2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/filenm.h"
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/groio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/pbcutil/rmpbc.h"
56 #include "gromacs/topology/index.h"
57 #include "gromacs/topology/topology.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
64 static const int NOTSET = -9368163;
66 static void calc_rm_cm(int isize, int index[], const t_atoms *atoms, rvec x[], rvec xcm)
71 /* calculate and remove center of mass of reference structure */
74 for (i = 0; i < isize; i++)
76 m = atoms->atom[index[i]].m;
77 for (d = 0; d < DIM; d++)
79 xcm[d] += m*x[index[i]][d];
83 svmul(1/tm, xcm, xcm);
84 for (i = 0; i < atoms->nr; i++)
90 static int build_res_index(int isize, int index[], t_atom atom[], int rindex[])
95 rindex[r] = atom[index[0]].resind;
97 for (i = 1; i < isize; i++)
99 if (atom[index[i]].resind != rindex[r-1])
101 rindex[r] = atom[index[i]].resind;
109 static int find_res_end(int i, int isize, int index[], const t_atoms *atoms)
113 rnr = atoms->atom[index[i]].resind;
114 while (i < isize && atoms->atom[index[i]].resind == rnr)
121 static int debug_strcmp(char s1[], char s2[])
125 fprintf(debug, " %s-%s", s1, s2);
127 return std::strcmp(s1, s2);
130 static int find_next_match_atoms_in_res(int *i1, int index1[],
131 int m1, char **atnms1[],
132 int *i2, int index2[],
133 int m2, char **atnms2[])
135 int dx, dy, dmax, cmp;
136 gmx_bool bFW = FALSE;
139 dmax = std::max(m1-*i1, m2-*i2);
140 for (dx = 0, dy = 0; dx < dmax && cmp != 0; dx++)
142 for (dy = dx; dy < dmax && cmp != 0; dy++)
151 if (*i1+dx < m1 && *i2+dy < m2)
154 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
157 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
160 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
163 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
166 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
172 /* apparently, dx and dy are incremented one more time
173 as the loops terminate: we correct this here */
180 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx) );
197 static int find_next_match_res(int *rnr1, int isize1,
198 int index1[], t_resinfo *resinfo1,
199 int *rnr2, int isize2,
200 int index2[], t_resinfo *resinfo2)
202 int dx, dy, dmax, cmp, rr1, rr2;
203 gmx_bool bFW = FALSE, bFF = FALSE;
206 while (rr1 < isize1 && *rnr1 != index1[rr1])
211 while (rr2 < isize2 && *rnr2 != index2[rr2])
217 dmax = std::max(isize1-rr1, isize2-rr2);
220 fprintf(debug, " R:%d-%d:%d-%d:%d ",
221 rr1, isize1, rr2, isize2, dmax);
223 for (dx = 0; dx < dmax && cmp != 0; dx++)
225 for (dy = 0; dy <= dx && cmp != 0; dy++)
230 if (rr1+dx < isize1 && rr2+dy < isize2)
233 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
234 *resinfo2[index2[rr2+dy]].name);
237 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
240 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
243 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
244 *resinfo2[index2[rr2+dx]].name);
247 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
250 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
253 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
254 *resinfo2[index2[rr2+dx]].name);
257 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
266 /* apparently, dx and dy are incremented one more time
267 as the loops terminate: we correct this here */
271 /* if we skipped equal on both sides, only skip one residue
272 to allow for single mutations of residues... */
277 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
278 *resinfo1[index1[rr1+1]].name,
279 *resinfo2[index2[rr2+1]].name);
311 static int find_first_atom_in_res(int rnr, int isize, int index[], t_atom atom[])
316 while (i < isize && atom[index[i]].resind != rnr)
321 if (atom[index[i]].resind == rnr)
331 static void find_matching_names(int *isize1, int index1[], const t_atoms *atoms1,
332 int *isize2, int index2[], const t_atoms *atoms2)
334 int i1, i2, ii1, ii2, m1, m2;
336 int rnr1, rnr2, prnr1, prnr2;
338 int *rindex1, *rindex2;
339 char ***atnms1, ***atnms2;
340 t_resinfo *resinfo1, *resinfo2;
342 /* set some handy shortcuts */
343 resinfo1 = atoms1->resinfo;
344 atnms1 = atoms1->atomname;
345 resinfo2 = atoms2->resinfo;
346 atnms2 = atoms2->atomname;
348 /* build indexes of selected residues */
349 snew(rindex1, atoms1->nres);
350 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
351 snew(rindex2, atoms2->nres);
352 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
357 prnr1 = prnr2 = NOTSET;
360 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
362 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
365 rnr1 = atoms1->atom[index1[i1]].resind;
366 rnr2 = atoms2->atom[index2[i2]].resind;
367 if (rnr1 != prnr1 || rnr2 != prnr2)
371 fprintf(debug, "R: %s%d %s%d\n",
372 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
374 rescmp = std::strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
378 fprintf(debug, "comparing %d %d", i1, i2);
380 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
383 if (atcmp != 0) /* no match -> find match within residues */
385 m1 = find_res_end(i1, *isize1, index1, atoms1);
386 m2 = find_res_end(i2, *isize2, index2, atoms2);
389 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
391 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
392 &i2, index2, m2, atnms2);
395 fprintf(debug, " -> %d %d %s-%s", i1, i2,
396 *atnms1[index1[i1]], *atnms2[index2[i2]]);
400 if (atcmp != 0) /* still no match -> next residue(s) */
404 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
405 &rnr2, rsize2, rindex2, resinfo2);
408 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
412 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
416 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
417 *resinfo1[rnr1].name, rnr1,
418 *atnms1[index1[i1]], index1[i1],
419 *resinfo2[rnr2].name, rnr2,
420 *atnms2[index2[i2]], index2[i2]);
422 m1 = find_res_end(i1, *isize1, index1, atoms1);
423 m2 = find_res_end(i2, *isize2, index2, atoms2);
426 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
428 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
429 &i2, index2, m2, atnms2);
432 fprintf(debug, " -> %d %d %s-%s", i1, i2,
433 *atnms1[index1[i1]], *atnms2[index2[i2]]);
438 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
440 if (atcmp == 0) /* if match -> store indices */
442 index1[ii1++] = index1[i1];
443 index2[ii2++] = index2[i2];
455 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
457 if (ii1 == i1 && ii2 == i2)
459 printf("All atoms in index groups 1 and 2 match\n");
463 if (i1 == i2 && ii1 == ii2)
465 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
471 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
475 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
484 int gmx_confrms(int argc, char *argv[])
486 const char *desc[] = {
487 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
488 "structures after least-squares fitting the second structure on the first one.",
489 "The two structures do NOT need to have the same number of atoms,",
490 "only the two index groups used for the fit need to be identical.",
491 "With [TT]-name[tt] only matching atom names from the selected groups",
492 "will be used for the fit and RMSD calculation. This can be useful ",
493 "when comparing mutants of a protein.",
495 "The superimposed structures are written to file. In a [REF].pdb[ref] file",
496 "the two structures will be written as separate models",
497 "(use [TT]rasmol -nmrpdb[tt]). Also in a [REF].pdb[ref] file, B-factors",
498 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
500 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
501 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
504 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
505 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
506 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
507 { "-fit", FALSE, etBOOL, {&bFit},
508 "Do least squares superposition of the target structure to the reference" },
509 { "-name", FALSE, etBOOL, {&bName},
510 "Only compare matching atom names" },
511 { "-label", FALSE, etBOOL, {&bLabel},
512 "Added chain labels A for first and B for second structure"},
513 { "-bfac", FALSE, etBOOL, {&bBfac},
514 "Output B-factors from atomic MSD values" }
517 { efTPS, "-f1", "conf1.gro", ffREAD },
518 { efSTX, "-f2", "conf2", ffREAD },
519 { efSTO, "-o", "fit.pdb", ffWRITE },
520 { efNDX, "-n1", "fit1", ffOPTRD },
521 { efNDX, "-n2", "fit2", ffOPTRD },
522 { efNDX, "-no", "match", ffOPTWR }
524 #define NFILE asize(fnm)
526 /* the two structure files */
527 const char *conf1file, *conf2file, *matchndxfile, *outfile;
530 t_topology *top1, *top2;
532 t_atoms *atoms1, *atoms2;
535 real *w_rls, mass, totmass;
536 rvec *x1, *v1, *x2, *v2, *fit_x;
539 gmx_output_env_t *oenv;
544 /* center of mass calculation */
547 /* variables for fit */
548 char *groupnames1, *groupnames2;
550 int *index1, *index2;
551 real rms, msd, minmsd, maxmsd;
555 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
556 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
560 matchndxfile = opt2fn_null("-no", NFILE, fnm);
561 conf1file = ftp2fn(efTPS, NFILE, fnm);
562 conf2file = ftp2fn(efSTX, NFILE, fnm);
564 /* reading reference structure from first structure file */
565 fprintf(stderr, "\nReading first structure file\n");
567 read_tps_conf(conf1file, top1, &ePBC1, &x1, &v1, box1, TRUE);
568 atoms1 = &(top1->atoms);
569 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
570 *top1->name, atoms1->nr, atoms1->nres);
574 rm_gropbc(atoms1, x1, box1);
577 fprintf(stderr, "Select group from first structure\n");
578 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
579 1, &isize1, &index1, &groupnames1);
582 if (bFit && (isize1 < 3))
584 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
587 /* reading second structure file */
588 fprintf(stderr, "\nReading second structure file\n");
590 read_tps_conf(conf2file, top2, &ePBC2, &x2, &v2, box2, TRUE);
591 atoms2 = &(top2->atoms);
592 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
593 *top2->name, atoms2->nr, atoms2->nres);
597 rm_gropbc(atoms2, x2, box2);
600 fprintf(stderr, "Select group from second structure\n");
601 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
602 1, &isize2, &index2, &groupnames2);
606 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
609 fp = gmx_ffopen(matchndxfile, "w");
610 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
611 groupnames1, conf1file, groupnames2, conf2file);
612 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
613 for (i = 0; i < isize1; i++)
615 fprintf(fp, "%4d%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
617 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
618 for (i = 0; i < isize2; i++)
620 fprintf(fp, "%4d%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
625 /* check isizes, must be equal */
626 if (isize2 != isize1)
628 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
631 for (i = 0; i < isize1; i++)
633 name1 = *atoms1->atomname[index1[i]];
634 name2 = *atoms2->atomname[index2[i]];
635 if (std::strcmp(name1, name2) != 0)
640 "Warning: atomnames at index %d don't match: %d %s, %d %s\n",
641 i+1, index1[i]+1, name1, index2[i]+1, name2);
647 atoms1->atom[index1[i]].m = 1;
648 atoms2->atom[index2[i]].m = 1;
653 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
658 /* calculate and remove center of mass of structures */
659 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
660 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
662 snew(w_rls, atoms2->nr);
663 snew(fit_x, atoms2->nr);
664 for (at = 0; (at < isize1); at++)
666 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
667 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
670 /* do the least squares fit to the reference structure */
671 do_fit(atoms2->nr, w_rls, fit_x, x2);
684 /* calculate the rms deviation */
690 for (at = 0; at < isize1; at++)
692 mass = atoms1->atom[index1[at]].m;
693 for (m = 0; m < DIM; m++)
695 msd = gmx::square(x1[index1[at]][m] - x2[index2[at]][m]);
699 maxmsd = std::max(maxmsd, msds[at]);
700 minmsd = std::min(minmsd, msds[at]);
703 rms = std::sqrt(rms/totmass);
705 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
708 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
713 /* reset coordinates of reference and fitted structure */
714 for (i = 0; i < atoms1->nr; i++)
716 for (m = 0; m < DIM; m++)
721 for (i = 0; i < atoms2->nr; i++)
723 for (m = 0; m < DIM; m++)
730 outfile = ftp2fn(efSTO, NFILE, fnm);
731 switch (fn2ftp(outfile))
738 srenew(atoms1->pdbinfo, atoms1->nr);
739 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
741 atoms1->havePdbInfo = TRUE;
743 /* Avoid segfaults when writing the pdb-file */
744 for (i = 0; i < atoms1->nr; i++)
746 atoms1->pdbinfo[i].type = eptAtom;
747 atoms1->pdbinfo[i].occup = 1.00;
748 atoms1->pdbinfo[i].bAnisotropic = FALSE;
751 atoms1->pdbinfo[i].bfac = 0;
755 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
759 for (i = 0; i < isize1; i++)
761 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
762 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
765 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
768 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
770 srenew(atoms2->pdbinfo, atoms2->nr);
771 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
773 for (i = 0; i < atoms2->nr; i++)
775 atoms2->pdbinfo[i].type = eptAtom;
776 atoms2->pdbinfo[i].occup = 1.00;
777 atoms2->pdbinfo[i].bAnisotropic = FALSE;
780 atoms2->pdbinfo[i].bfac = 0;
784 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
788 for (i = 0; i < isize2; i++)
790 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
791 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
794 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
797 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
800 fp = gmx_ffopen(outfile, "w");
803 write_pdbfile(fp, *top1->name, atoms1, x1, ePBC1, box1, ' ', 1, nullptr, TRUE);
805 write_pdbfile(fp, *top2->name, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, nullptr, TRUE);
811 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
813 fp = gmx_ffopen(outfile, "w");
816 write_hconf_p(fp, *top1->name, atoms1, x1, v1, box1);
818 write_hconf_p(fp, *top2->name, atoms2, x2, v2, box2);
824 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
825 ftp2ext(fn2ftp(outfile)));
830 "WARNING: cannot write the reference structure to %s file\n",
831 ftp2ext(fn2ftp(outfile)));
833 write_sto_conf(outfile, *top2->name, atoms2, x2, v2, ePBC2, box2);
837 view_all(oenv, NFILE, fnm);