2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "gromacs/topology/index.h"
50 #include "gromacs/legacyheaders/macros.h"
51 #include "gromacs/legacyheaders/txtdump.h"
53 #include "gromacs/fileio/gmxfio.h"
54 #include "gromacs/fileio/strdb.h"
55 #include "gromacs/topology/atoms.h"
56 #include "gromacs/topology/block.h"
57 #include "gromacs/topology/invblock.h"
58 #include "gromacs/topology/residuetypes.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/smalloc.h"
63 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
71 c = toupper(fgetc(stdin));
73 while ((c != 'Y') && (c != 'N'));
83 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
88 out = gmx_fio_fopen(outf, "w");
89 /* fprintf(out,"%5d %5d\n",b->nr,b->nra); */
90 for (i = 0; (i < b->nr); i++)
92 fprintf(out, "[ %s ]\n", gnames[i]);
93 for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
95 fprintf(out, "%4d ", b->a[j]+1);
104 /* Duplicate copy, useful for computational electrophysiology double-layer setups */
107 fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
108 for (i = 0; (i < b->nr); i++)
110 fprintf(out, "[ %s_copy ]\n", gnames[i]);
111 for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
113 fprintf(out, "%4d ", b->a[j]+1 + natoms );
126 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
130 srenew(b->index, b->nr+2);
131 srenew(*gnames, b->nr+1);
132 (*gnames)[b->nr] = gmx_strdup(name);
134 srenew(b->a, b->nra+nra);
135 for (i = 0; (i < nra); i++)
137 b->a[b->nra++] = a[i];
140 b->index[b->nr] = b->nra;
143 /* compare index in `a' with group in `b' at `index',
144 when `index'<0 it is relative to end of `b' */
145 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
151 index = b->nr-1+index;
155 gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
158 if (nra != b->index[index+1] - b->index[index])
162 for (i = 0; i < nra; i++)
164 if (a[i] != b->a[b->index[index]+i])
173 p_status(const char **restype, int nres, const char **typenames, int ntypes)
178 snew(counter, ntypes);
179 for (i = 0; i < ntypes; i++)
183 for (i = 0; i < nres; i++)
185 for (j = 0; j < ntypes; j++)
187 if (!gmx_strcasecmp(restype[i], typenames[j]))
194 for (i = 0; (i < ntypes); i++)
198 printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
207 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
208 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
216 for (i = 0; (i < atoms->nr); i++)
218 res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
238 static void analyse_other(const char ** restype, t_atoms *atoms,
239 t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
241 restp_t *restp = NULL;
244 atom_id *other_ndx, *aid, *aaid;
245 int i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
247 for (i = 0; (i < atoms->nres); i++)
249 if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
259 printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
261 snew(other_ndx, atoms->nr);
262 for (k = 0; (k < atoms->nr); k++)
264 resind = atoms->atom[k].resind;
265 rname = *atoms->resinfo[resind].name;
266 if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
267 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
270 for (l = 0; (l < nrestp); l++)
272 if (strcmp(restp[l].rname, rname) == 0)
279 srenew(restp, nrestp+1);
280 restp[nrestp].rname = gmx_strdup(rname);
281 restp[nrestp].bNeg = FALSE;
282 restp[nrestp].gname = gmx_strdup(rname);
288 for (i = 0; (i < nrestp); i++)
290 snew(aid, atoms->nr);
292 for (j = 0; (j < atoms->nr); j++)
294 rname = *atoms->resinfo[atoms->atom[j].resind].name;
295 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
296 (strcmp(restp[i].rname, rname) != 0 && restp[i].bNeg))
301 add_grp(gb, gn, naid, aid, restp[i].gname);
304 printf("split %s into atoms (y/n) ? ", restp[i].gname);
306 if (gmx_ask_yesno(bASK))
309 for (k = 0; (k < naid); k++)
311 aname = *atoms->atomname[aid[k]];
312 for (l = 0; (l < natp); l++)
314 if (strcmp(aname, attp[l]) == 0)
321 srenew(attp, ++natp);
322 attp[natp-1] = aname;
327 for (l = 0; (l < natp); l++)
331 for (k = 0; (k < naid); k++)
333 aname = *atoms->atomname[aid[k]];
334 if (strcmp(aname, attp[l]) == 0)
336 aaid[naaid++] = aid[k];
339 add_grp(gb, gn, naaid, aaid, attp[l]);
354 * Cata necessary to construct a single (protein) index group in
357 typedef struct gmx_help_make_index_group
359 /** The set of atom names that will be used to form this index group */
360 const char **defining_atomnames;
361 /** Size of the defining_atomnames array */
362 int num_defining_atomnames;
363 /** Name of this index group */
364 const char *group_name;
365 /** Whether the above atom names name the atoms in the group, or
366 those not in the group */
367 gmx_bool bTakeComplement;
368 /** The index in wholename gives the first item in the arrays of
369 atomnames that should be tested with 'gmx_strncasecmp' in stead of
370 gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
371 This is comparable to using a '*' wildcard at the end of specific
372 atom names, but that is more involved to implement...
375 /** Only create this index group if it differs from the one specified in compareto,
376 where -1 means to always create this group. */
378 } t_gmx_help_make_index_group;
380 static void analyse_prot(const char ** restype, t_atoms *atoms,
381 t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
383 /* lists of atomnames to be used in constructing index groups: */
384 static const char *pnoh[] = { "H", "HN" };
385 static const char *pnodum[] = {
386 "MN1", "MN2", "MCB1", "MCB2", "MCG1", "MCG2",
387 "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
389 static const char *calpha[] = { "CA" };
390 static const char *bb[] = { "N", "CA", "C" };
391 static const char *mc[] = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
392 static const char *mcb[] = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
393 static const char *mch[] = {
394 "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
395 "H1", "H2", "H3", "H", "HN"
398 static const t_gmx_help_make_index_group constructing_data[] =
399 {{ NULL, 0, "Protein", TRUE, -1, -1},
400 { pnoh, asize(pnoh), "Protein-H", TRUE, 0, -1},
401 { calpha, asize(calpha), "C-alpha", FALSE, -1, -1},
402 { bb, asize(bb), "Backbone", FALSE, -1, -1},
403 { mc, asize(mc), "MainChain", FALSE, -1, -1},
404 { mcb, asize(mcb), "MainChain+Cb", FALSE, -1, -1},
405 { mch, asize(mch), "MainChain+H", FALSE, -1, -1},
406 { mch, asize(mch), "SideChain", TRUE, -1, -1},
407 { mch, asize(mch), "SideChain-H", TRUE, 11, -1},
408 { pnodum, asize(pnodum), "Prot-Masses", TRUE, -1, 0}, };
409 const int num_index_groups = asize(constructing_data);
415 char ndx_name[STRLEN], *atnm;
420 printf("Analysing Protein...\n");
422 snew(aid, atoms->nr);
424 /* calculate the number of protein residues */
426 for (i = 0; (i < atoms->nres); i++)
428 if (0 == gmx_strcasecmp(restype[i], "Protein"))
433 /* find matching or complement atoms */
434 for (i = 0; (i < (int)num_index_groups); i++)
437 for (n = 0; (n < atoms->nr); n++)
439 if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
442 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
444 /* skip digits at beginning of atomname, e.g. 1H */
445 atnm = *atoms->atomname[n];
446 while (isdigit(atnm[0]))
450 if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
452 if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
459 if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
465 if (constructing_data[i].bTakeComplement != match)
471 /* if we want to add this group always or it differs from previous
473 if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
475 add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
481 for (i = 0; (i < (int)num_index_groups); i++)
483 printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
484 if (gmx_ask_yesno(bASK))
488 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
490 resind = atoms->atom[n].resind;
491 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
494 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
496 if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
501 if (constructing_data[i].bTakeComplement != match)
506 /* copy the residuename to the tail of the groupname */
510 ri = &atoms->resinfo[resind];
511 sprintf(ndx_name, "%s_%s%d%c",
512 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
513 add_grp(gb, gn, nra, aid, ndx_name);
519 printf("Make group with sidechain and C=O swapped (y/n) ? ");
520 if (gmx_ask_yesno(bASK))
522 /* Make swap sidechain C=O index */
525 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
527 resind = atoms->atom[n].resind;
529 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
531 if (strcmp("CA", *atoms->atomname[n]) == 0)
537 else if (strcmp("C", *atoms->atomname[n]) == 0)
541 gmx_incons("Atom naming problem");
545 else if (strcmp("O", *atoms->atomname[n]) == 0)
549 gmx_incons("Atom naming problem");
553 else if (strcmp("O1", *atoms->atomname[n]) == 0)
557 gmx_incons("Atom naming problem");
567 /* copy the residuename to the tail of the groupname */
570 add_grp(gb, gn, nra, aid, "SwapSC-CO");
578 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
580 gmx_residuetype_t*rt = NULL;
583 const char ** restype;
587 const char ** p_typename;
594 printf("Analysing residue names:\n");
596 /* Create system group, every single atom */
597 snew(aid, atoms->nr);
598 for (i = 0; i < atoms->nr; i++)
602 add_grp(gb, gn, atoms->nr, aid, "System");
605 /* For every residue, get a pointer to the residue type name */
606 gmx_residuetype_init(&rt);
609 snew(restype, atoms->nres);
612 for (i = 0; i < atoms->nres; i++)
614 resnm = *atoms->resinfo[i].name;
615 gmx_residuetype_get_type(rt, resnm, &(restype[i]));
617 /* Note that this does not lead to a N*N loop, but N*K, where
618 * K is the number of residue _types_, which is small and independent of N.
621 for (k = 0; k < ntypes && !found; k++)
623 assert(p_typename != NULL);
624 found = !strcmp(restype[i], p_typename[k]);
628 srenew(p_typename, ntypes+1);
629 p_typename[ntypes++] = gmx_strdup(restype[i]);
635 p_status(restype, atoms->nres, p_typename, ntypes);
638 for (k = 0; k < ntypes; k++)
640 aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
642 /* Check for special types to do fancy stuff with */
644 if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
648 analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
650 /* Create a Non-Protein group */
651 aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
652 if ((nra > 0) && (nra < atoms->nr))
654 add_grp(gb, gn, nra, aid, "non-Protein");
658 else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
660 add_grp(gb, gn, nra, aid, p_typename[k]);
661 /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
662 add_grp(gb, gn, nra, aid, "SOL");
666 /* Solvent, create a negated group too */
667 aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
668 if ((nra > 0) && (nra < atoms->nr))
670 add_grp(gb, gn, nra, aid, "non-Water");
677 add_grp(gb, gn, nra, aid, p_typename[k]);
679 analyse_other(restype, atoms, gb, gn, bASK, bVerb);
685 gmx_residuetype_destroy(rt);
687 /* Create a merged water_and_ions group */
693 for (i = 0; i < gb->nr; i++)
695 if (!gmx_strcasecmp((*gn)[i], "Water"))
698 nwater = gb->index[i+1]-gb->index[i];
700 else if (!gmx_strcasecmp((*gn)[i], "Ion"))
703 nion = gb->index[i+1]-gb->index[i];
707 if (nwater > 0 && nion > 0)
709 srenew(gb->index, gb->nr+2);
710 srenew(*gn, gb->nr+1);
711 (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
712 srenew(gb->a, gb->nra+nwater+nion);
715 for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
717 gb->a[gb->nra++] = gb->a[i];
722 for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
724 gb->a[gb->nra++] = gb->a[i];
728 gb->index[gb->nr] = gb->nra;
733 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
737 for (i = 0; i < n; i++)
739 if (index[i] >= natoms)
741 gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
742 gname ? gname : "Index", i+1, index[i]+1,
743 traj ? traj : "the trajectory", natoms);
745 else if (index[i] < 0)
747 gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
748 gname ? gname : "Index", i+1, index[i]+1);
753 t_blocka *init_index(const char *gfile, char ***grpname)
759 char line[STRLEN], *pt, str[STRLEN];
761 in = gmx_fio_fopen(gfile, "r");
769 while (get_a_line(in, line, STRLEN))
771 if (get_header(line, str))
774 srenew(b->index, b->nr+1);
775 srenew(*grpname, b->nr);
780 b->index[b->nr] = b->index[b->nr-1];
781 (*grpname)[b->nr-1] = gmx_strdup(str);
787 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
790 while (sscanf(pt, "%s", str) == 1)
796 srenew(b->a, maxentries);
798 assert(b->a != NULL); // for clang analyzer
799 b->a[i] = strtol(str, NULL, 10)-1;
802 pt = strstr(pt, str)+strlen(str);
808 for (i = 0; (i < b->nr); i++)
810 assert(b->a != NULL); // for clang analyzer
811 for (j = b->index[i]; (j < b->index[i+1]); j++)
815 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
816 b->a[j], (*grpname)[i]);
824 static void minstring(char *str)
828 for (i = 0; (i < (int)strlen(str)); i++)
837 int find_group(char s[], int ngrps, char **grpname)
846 /* first look for whole name match */
849 for (i = 0; i < ngrps; i++)
851 if (gmx_strcasecmp_min(s, grpname[i]) == 0)
861 /* second look for first string match */
864 for (i = 0; i < ngrps; i++)
866 if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
876 /* last look for arbitrary substring match */
881 for (i = 0; i < ngrps; i++)
883 strcpy(string, grpname[i]);
886 if (strstr(string, s) != NULL)
898 printf("Error: Multiple groups '%s' selected\n", s);
904 static int qgroup(int *a, int ngrps, char **grpname)
913 fprintf(stderr, "Select a group: ");
916 if (scanf("%s", s) != 1)
918 gmx_fatal(FARGS, "Cannot read from input");
920 trim(s); /* remove spaces */
922 while (strlen(s) == 0);
923 aa = strtol(s, &end, 10);
924 if (aa == 0 && end[0] != '\0') /* string entered */
926 aa = find_group(s, ngrps, grpname);
928 bInRange = (aa >= 0 && aa < ngrps);
931 printf("Error: No such group '%s'\n", s);
935 printf("Selected %d: '%s'\n", aa, grpname[aa]);
940 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
941 int ngrps, int isize[], atom_id *index[], int grpnr[])
947 gmx_fatal(FARGS, "Error: no groups in indexfile");
949 for (i = 0; (i < grps->nr); i++)
951 fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
952 grps->index[i+1]-grps->index[i]);
954 for (i = 0; (i < ngrps); i++)
960 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
961 if ((gnr1 < 0) || (gnr1 >= grps->nr))
963 fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
966 while ((gnr1 < 0) || (gnr1 >= grps->nr));
970 fprintf(stderr, "There is one group in the index\n");
973 gnames[i] = gmx_strdup(grpname[gnr1]);
974 isize[i] = grps->index[gnr1+1]-grps->index[gnr1];
975 snew(index[i], isize[i]);
976 for (j = 0; (j < isize[i]); j++)
978 index[i][j] = grps->a[grps->index[gnr1]+j];
983 void rd_index(const char *statfile, int ngrps, int isize[],
984 atom_id *index[], char *grpnames[])
993 gmx_fatal(FARGS, "No index file specified");
995 grps = init_index(statfile, &gnames);
996 rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
999 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1000 atom_id *index[], char *grpnames[], int grpnr[])
1007 gmx_fatal(FARGS, "No index file specified");
1009 grps = init_index(statfile, &gnames);
1011 rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1014 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1015 int isize[], atom_id *index[], char *grpnames[])
1018 t_blocka *grps = NULL;
1025 grps = init_index(fnm, gnames);
1030 snew(grps->index, 1);
1031 analyse(atoms, grps, gnames, FALSE, FALSE);
1035 gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1038 rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1041 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1047 c->clust = init_index(ndx, &c->grpname);
1049 for (i = 0; (i < c->clust->nra); i++)
1051 c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1053 fprintf(fplog ? fplog : stdout,
1054 "There are %d clusters containing %d structures, highest framenr is %d\n",
1055 c->clust->nr, c->clust->nra, c->maxframe);
1058 pr_blocka(debug, 0, "clust", c->clust, TRUE);
1059 for (i = 0; (i < c->clust->nra); i++)
1061 if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1063 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1064 "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1068 c->inv_clust = make_invblocka(c->clust, c->maxframe);