src/gromacs/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <assert.h>
  42 #include <ctype.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 #include "macros.h"
  47 #include "names.h"
  48 #include "typedefs.h"
  49 #include "macros.h"
  50 #include "index.h"
  51 #include "txtdump.h"
  52
  53 #include "gromacs/fileio/gmxfio.h"
  54 #include "gromacs/fileio/strdb.h"
  55 #include "gromacs/topology/invblock.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/futil.h"
  59 #include "gromacs/utility/smalloc.h"
  60
  61 const char gmx_residuetype_undefined[] = "Other";
  62
  63 struct gmx_residuetype
  64 {
  65     int      n;
  66     char **  resname;
  67     char **  restype;
  68
  69 };
  70
  71
  72 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  73 {
  74     char c;
  75
  76     if (bASK)
  77     {
  78         do
  79         {
  80             c = toupper(fgetc(stdin));
  81         }
  82         while ((c != 'Y') && (c != 'N'));
  83
  84         return (c == 'Y');
  85     }
  86     else
  87     {
  88         return FALSE;
  89     }
  90 }
  91
  92 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  93 {
  94     FILE *out;
  95     int   i, j, k;
  96
  97     out = gmx_fio_fopen(outf, "w");
  98     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  99     for (i = 0; (i < b->nr); i++)
 100     {
 101         fprintf(out, "[ %s ]\n", gnames[i]);
 102         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 103         {
 104             fprintf(out, "%4d ", b->a[j]+1);
 105             if ((k % 15) == 14)
 106             {
 107                 fprintf(out, "\n");
 108             }
 109         }
 110         fprintf(out, "\n");
 111     }
 112
 113     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 114     if (bDuplicate)
 115     {
 116         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 117         for (i = 0; (i < b->nr); i++)
 118         {
 119             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 120             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 121             {
 122                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 123                 if ((k % 15) == 14)
 124                 {
 125                     fprintf(out, "\n");
 126                 }
 127             }
 128             fprintf(out, "\n");
 129         }
 130     }
 131
 132     gmx_fio_fclose(out);
 133 }
 134
 135 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 136 {
 137     int i;
 138
 139     srenew(b->index, b->nr+2);
 140     srenew(*gnames, b->nr+1);
 141     (*gnames)[b->nr] = strdup(name);
 142
 143     srenew(b->a, b->nra+nra);
 144     for (i = 0; (i < nra); i++)
 145     {
 146         b->a[b->nra++] = a[i];
 147     }
 148     b->nr++;
 149     b->index[b->nr] = b->nra;
 150 }
 151
 152 /* compare index in `a' with group in `b' at `index',
 153    when `index'<0 it is relative to end of `b' */
 154 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 155 {
 156     int i;
 157
 158     if (index < 0)
 159     {
 160         index = b->nr-1+index;
 161     }
 162     if (index >= b->nr)
 163     {
 164         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 165     }
 166     /* compare sizes */
 167     if (nra != b->index[index+1] - b->index[index])
 168     {
 169         return FALSE;
 170     }
 171     for (i = 0; i < nra; i++)
 172     {
 173         if (a[i] != b->a[b->index[index]+i])
 174         {
 175             return FALSE;
 176         }
 177     }
 178     return TRUE;
 179 }
 180
 181 static void
 182 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 183 {
 184     int   i, j;
 185     int   found;
 186
 187     int * counter;
 188
 189     snew(counter, ntypes);
 190     for (i = 0; i < ntypes; i++)
 191     {
 192         counter[i] = 0;
 193     }
 194     for (i = 0; i < nres; i++)
 195     {
 196         found = 0;
 197         for (j = 0; j < ntypes; j++)
 198         {
 199             if (!gmx_strcasecmp(restype[i], typenames[j]))
 200             {
 201                 counter[j]++;
 202             }
 203         }
 204     }
 205
 206     for (i = 0; (i < ntypes); i++)
 207     {
 208         if (counter[i] > 0)
 209         {
 210             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 211         }
 212     }
 213
 214     sfree(counter);
 215 }
 216
 217
 218 atom_id *
 219 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 220 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 221 {
 222     atom_id *a;
 223     int      i;
 224     int      res;
 225
 226     snew(a, atoms->nr);
 227     *nra = 0;
 228     for (i = 0; (i < atoms->nr); i++)
 229     {
 230         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 231         if (bMatch == FALSE)
 232         {
 233             res = !res;
 234         }
 235         if (res)
 236         {
 237             a[(*nra)++] = i;
 238         }
 239     }
 240
 241     return a;
 242 }
 243
 244 typedef struct {
 245     char    *rname;
 246     gmx_bool bNeg;
 247     char    *gname;
 248 } restp_t;
 249
 250 static void analyse_other(const char ** restype, t_atoms *atoms,
 251                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 252 {
 253     restp_t *restp = NULL;
 254     char   **attp  = NULL;
 255     char    *rname, *aname;
 256     atom_id *other_ndx, *aid, *aaid;
 257     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 258
 259     for (i = 0; (i < atoms->nres); i++)
 260     {
 261         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 262         {
 263             break;
 264         }
 265     }
 266     if (i < atoms->nres)
 267     {
 268         /* we have others */
 269         if (bVerb)
 270         {
 271             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 272         }
 273         snew(other_ndx, atoms->nr);
 274         for (k = 0; (k < atoms->nr); k++)
 275         {
 276             resind = atoms->atom[k].resind;
 277             rname  = *atoms->resinfo[resind].name;
 278             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 279                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 280             {
 281
 282                 for (l = 0; (l < nrestp); l++)
 283                 {
 284                     if (strcmp(restp[l].rname, rname) == 0)
 285                     {
 286                         break;
 287                     }
 288                 }
 289                 if (l == nrestp)
 290                 {
 291                     srenew(restp, nrestp+1);
 292                     restp[nrestp].rname = strdup(rname);
 293                     restp[nrestp].bNeg  = FALSE;
 294                     restp[nrestp].gname = strdup(rname);
 295                     nrestp++;
 296
 297                 }
 298             }
 299         }
 300         for (i = 0; (i < nrestp); i++)
 301         {
 302             snew(aid, atoms->nr);
 303             naid = 0;
 304             for (j = 0; (j < atoms->nr); j++)
 305             {
 306                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 307                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 308                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 309                 {
 310                     aid[naid++] = j;
 311                 }
 312             }
 313             add_grp(gb, gn, naid, aid, restp[i].gname);
 314             if (bASK)
 315             {
 316                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 317                 fflush(stdout);
 318                 if (gmx_ask_yesno(bASK))
 319                 {
 320                     natp = 0;
 321                     for (k = 0; (k < naid); k++)
 322                     {
 323                         aname = *atoms->atomname[aid[k]];
 324                         for (l = 0; (l < natp); l++)
 325                         {
 326                             if (strcmp(aname, attp[l]) == 0)
 327                             {
 328                                 break;
 329                             }
 330                         }
 331                         if (l == natp)
 332                         {
 333                             srenew(attp, ++natp);
 334                             attp[natp-1] = aname;
 335                         }
 336                     }
 337                     if (natp > 1)
 338                     {
 339                         for (l = 0; (l < natp); l++)
 340                         {
 341                             snew(aaid, naid);
 342                             naaid = 0;
 343                             for (k = 0; (k < naid); k++)
 344                             {
 345                                 aname = *atoms->atomname[aid[k]];
 346                                 if (strcmp(aname, attp[l]) == 0)
 347                                 {
 348                                     aaid[naaid++] = aid[k];
 349                                 }
 350                             }
 351                             add_grp(gb, gn, naaid, aaid, attp[l]);
 352                             sfree(aaid);
 353                         }
 354                     }
 355                     sfree(attp);
 356                     attp = NULL;
 357                 }
 358                 sfree(aid);
 359             }
 360         }
 361         sfree(other_ndx);
 362     }
 363 }
 364
 365 /*! \brief
 366  * Cata necessary to construct a single (protein) index group in
 367  * analyse_prot().
 368  */
 369 typedef struct gmx_help_make_index_group
 370 {
 371     /** The set of atom names that will be used to form this index group */
 372     const char **defining_atomnames;
 373     /** Size of the defining_atomnames array */
 374     const int    num_defining_atomnames;
 375     /** Name of this index group */
 376     const char  *group_name;
 377     /** Whether the above atom names name the atoms in the group, or
 378         those not in the group */
 379     gmx_bool bTakeComplement;
 380     /** The index in wholename gives the first item in the arrays of
 381        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 382        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 383        This is comparable to using a '*' wildcard at the end of specific
 384        atom names, but that is more involved to implement...
 385      */
 386     int wholename;
 387     /** Only create this index group if it differs from the one specified in compareto,
 388        where -1 means to always create this group. */
 389     int compareto;
 390 } t_gmx_help_make_index_group;
 391
 392 static void analyse_prot(const char ** restype, t_atoms *atoms,
 393                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 394 {
 395     /* lists of atomnames to be used in constructing index groups: */
 396     static const char *pnoh[]    = { "H", "HN" };
 397     static const char *pnodum[]  = {
 398         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 399         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 400     };
 401     static const char *calpha[]  = { "CA" };
 402     static const char *bb[]      = { "N", "CA", "C" };
 403     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 404     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 405     static const char *mch[]     = {
 406         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 407         "H1", "H2", "H3", "H", "HN"
 408     };
 409
 410     static const t_gmx_help_make_index_group constructing_data[] =
 411     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 412      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 413      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 414      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 415      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 416      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 417      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 418      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 419      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 420      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 421     const int   num_index_groups = asize(constructing_data);
 422
 423     int         n, j;
 424     atom_id    *aid;
 425     int         nra, nnpres, npres;
 426     gmx_bool    match;
 427     char        ndx_name[STRLEN], *atnm;
 428     int         i;
 429
 430     if (bVerb)
 431     {
 432         printf("Analysing Protein...\n");
 433     }
 434     snew(aid, atoms->nr);
 435
 436     /* calculate the number of protein residues */
 437     npres = 0;
 438     for (i = 0; (i < atoms->nres); i++)
 439     {
 440         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 441         {
 442             npres++;
 443         }
 444     }
 445     /* find matching or complement atoms */
 446     for (i = 0; (i < (int)num_index_groups); i++)
 447     {
 448         nra = 0;
 449         for (n = 0; (n < atoms->nr); n++)
 450         {
 451             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 452             {
 453                 match = FALSE;
 454                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 455                 {
 456                     /* skip digits at beginning of atomname, e.g. 1H */
 457                     atnm = *atoms->atomname[n];
 458                     while (isdigit(atnm[0]))
 459                     {
 460                         atnm++;
 461                     }
 462                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 463                     {
 464                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 465                         {
 466                             match = TRUE;
 467                         }
 468                     }
 469                     else
 470                     {
 471                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 472                         {
 473                             match = TRUE;
 474                         }
 475                     }
 476                 }
 477                 if (constructing_data[i].bTakeComplement != match)
 478                 {
 479                     aid[nra++] = n;
 480                 }
 481             }
 482         }
 483         /* if we want to add this group always or it differs from previous
 484            group, add it: */
 485         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 486         {
 487             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 488         }
 489     }
 490
 491     if (bASK)
 492     {
 493         for (i = 0; (i < (int)num_index_groups); i++)
 494         {
 495             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 496             if (gmx_ask_yesno(bASK))
 497             {
 498                 int resind;
 499                 nra = 0;
 500                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 501                 {
 502                     resind = atoms->atom[n].resind;
 503                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 504                     {
 505                         match = FALSE;
 506                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 507                         {
 508                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 509                             {
 510                                 match = TRUE;
 511                             }
 512                         }
 513                         if (constructing_data[i].bTakeComplement != match)
 514                         {
 515                             aid[nra++] = n;
 516                         }
 517                     }
 518                     /* copy the residuename to the tail of the groupname */
 519                     if (nra > 0)
 520                     {
 521                         t_resinfo *ri;
 522                         ri = &atoms->resinfo[resind];
 523                         sprintf(ndx_name, "%s_%s%d%c",
 524                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 525                         add_grp(gb, gn, nra, aid, ndx_name);
 526                         nra = 0;
 527                     }
 528                 }
 529             }
 530         }
 531         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 532         if (gmx_ask_yesno(bASK))
 533         {
 534             /* Make swap sidechain C=O index */
 535             int resind, hold;
 536             nra = 0;
 537             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 538             {
 539                 resind = atoms->atom[n].resind;
 540                 hold   = -1;
 541                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 542                 {
 543                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 544                     {
 545                         aid[nra++] = n;
 546                         hold       = nra;
 547                         nra       += 2;
 548                     }
 549                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 550                     {
 551                         if (hold == -1)
 552                         {
 553                             gmx_incons("Atom naming problem");
 554                         }
 555                         aid[hold] = n;
 556                     }
 557                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 558                     {
 559                         if (hold == -1)
 560                         {
 561                             gmx_incons("Atom naming problem");
 562                         }
 563                         aid[hold+1] = n;
 564                     }
 565                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 566                     {
 567                         if (hold == -1)
 568                         {
 569                             gmx_incons("Atom naming problem");
 570                         }
 571                         aid[hold+1] = n;
 572                     }
 573                     else
 574                     {
 575                         aid[nra++] = n;
 576                     }
 577                 }
 578             }
 579             /* copy the residuename to the tail of the groupname */
 580             if (nra > 0)
 581             {
 582                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 583                 nra = 0;
 584             }
 585         }
 586     }
 587     sfree(aid);
 588 }
 589
 590
 591
 592
 593 /* Return 0 if the name was found, otherwise -1.
 594  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 595  */
 596 int
 597 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 598 {
 599     int    i, rc;
 600
 601     rc = -1;
 602     for (i = 0; i < rt->n && rc; i++)
 603     {
 604         rc = gmx_strcasecmp(rt->resname[i], resname);
 605     }
 606
 607     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 608
 609     return rc;
 610 }
 611
 612 int
 613 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 614 {
 615     int           i;
 616     int           found;
 617     const char *  p_oldtype;
 618
 619     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 620
 621     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 622     {
 623         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 624                 newresname, p_oldtype, newrestype);
 625     }
 626
 627     if (found == 0)
 628     {
 629         srenew(rt->resname, rt->n+1);
 630         srenew(rt->restype, rt->n+1);
 631         rt->resname[rt->n] = strdup(newresname);
 632         rt->restype[rt->n] = strdup(newrestype);
 633         rt->n++;
 634     }
 635
 636     return 0;
 637 }
 638
 639
 640 int
 641 gmx_residuetype_init(gmx_residuetype_t *prt)
 642 {
 643     FILE                 *  db;
 644     char                    line[STRLEN];
 645     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 646     char                 *  p;
 647     int                     i;
 648     struct gmx_residuetype *rt;
 649
 650     snew(rt, 1);
 651     *prt = rt;
 652
 653     rt->n        = 0;
 654     rt->resname  = NULL;
 655     rt->restype  = NULL;
 656
 657     db = libopen("residuetypes.dat");
 658
 659     while (get_a_line(db, line, STRLEN))
 660     {
 661         strip_comment(line);
 662         trim(line);
 663         if (line[0] != '\0')
 664         {
 665             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 666             {
 667                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 668             }
 669             gmx_residuetype_add(rt, resname, restype);
 670         }
 671     }
 672
 673     fclose(db);
 674
 675     return 0;
 676 }
 677
 678
 679
 680 int
 681 gmx_residuetype_destroy(gmx_residuetype_t rt)
 682 {
 683     int i;
 684
 685     for (i = 0; i < rt->n; i++)
 686     {
 687         sfree(rt->resname[i]);
 688         sfree(rt->restype[i]);
 689     }
 690     sfree(rt->resname);
 691     sfree(rt->restype);
 692     sfree(rt);
 693
 694     return 0;
 695 }
 696
 697 int
 698 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 699                              const char ***       p_typenames,
 700                              int *                ntypes)
 701 {
 702     int            i, j, n;
 703     int            found;
 704     const char **  my_typename;
 705     char       *   p;
 706
 707     n = 0;
 708
 709     my_typename = NULL;
 710     for (i = 0; i < rt->n; i++)
 711     {
 712         p     = rt->restype[i];
 713         found = 0;
 714         for (j = 0; j < n && !found; j++)
 715         {
 716             found = !gmx_strcasecmp(p, my_typename[j]);
 717         }
 718
 719         if (!found)
 720         {
 721             srenew(my_typename, n+1);
 722             my_typename[n] = p;
 723             n++;
 724         }
 725     }
 726     *ntypes      = n;
 727     *p_typenames = my_typename;
 728
 729     return 0;
 730 }
 731
 732
 733
 734 gmx_bool
 735 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 736 {
 737     gmx_bool    rc;
 738     const char *p_type;
 739
 740     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 741         gmx_strcasecmp(p_type, "Protein") == 0)
 742     {
 743         rc = TRUE;
 744     }
 745     else
 746     {
 747         rc = FALSE;
 748     }
 749     return rc;
 750 }
 751
 752 gmx_bool
 753 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 754 {
 755     gmx_bool    rc;
 756     const char *p_type;
 757
 758     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 759         gmx_strcasecmp(p_type, "DNA") == 0)
 760     {
 761         rc = TRUE;
 762     }
 763     else
 764     {
 765         rc = FALSE;
 766     }
 767     return rc;
 768 }
 769
 770 gmx_bool
 771 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 772 {
 773     gmx_bool    rc;
 774     const char *p_type;
 775
 776     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 777         gmx_strcasecmp(p_type, "RNA") == 0)
 778     {
 779         rc = TRUE;
 780     }
 781     else
 782     {
 783         rc = FALSE;
 784     }
 785     return rc;
 786 }
 787
 788 /* Return the size of the arrays */
 789 int
 790 gmx_residuetype_get_size(gmx_residuetype_t rt)
 791 {
 792     return rt->n;
 793 }
 794
 795 /* Search for a residuetype with name resnm within the
 796  * gmx_residuetype database. Return the index if found,
 797  * otherwise -1.
 798  */
 799 int
 800 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 801 {
 802     int i, rc;
 803
 804     rc = -1;
 805     for (i = 0; i < rt->n && rc; i++)
 806     {
 807         rc = gmx_strcasecmp(rt->resname[i], resnm);
 808     }
 809
 810     return (0 == rc) ? i-1 : -1;
 811 }
 812
 813 /* Return the name of the residuetype with the given index, or
 814  * NULL if not found. */
 815 const char *
 816 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 817 {
 818     if (index >= 0 && index < rt->n)
 819     {
 820         return rt->resname[index];
 821     }
 822     else
 823     {
 824         return NULL;
 825     }
 826 }
 827
 828
 829
 830 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 831 {
 832     gmx_residuetype_t rt = NULL;
 833     char             *resnm;
 834     atom_id          *aid;
 835     const char    **  restype;
 836     int               nra;
 837     int               i, k;
 838     size_t            j;
 839     int               ntypes;
 840     char           *  p;
 841     const char     ** p_typename;
 842     int               iwater, iion;
 843     int               nwater, nion;
 844     int               found;
 845
 846     if (bVerb)
 847     {
 848         printf("Analysing residue names:\n");
 849     }
 850     /* Create system group, every single atom */
 851     snew(aid, atoms->nr);
 852     for (i = 0; i < atoms->nr; i++)
 853     {
 854         aid[i] = i;
 855     }
 856     add_grp(gb, gn, atoms->nr, aid, "System");
 857     sfree(aid);
 858
 859     /* For every residue, get a pointer to the residue type name */
 860     gmx_residuetype_init(&rt);
 861     assert(rt);
 862
 863     snew(restype, atoms->nres);
 864     ntypes     = 0;
 865     p_typename = NULL;
 866     for (i = 0; i < atoms->nres; i++)
 867     {
 868         resnm = *atoms->resinfo[i].name;
 869         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 870
 871         /* Note that this does not lead to a N*N loop, but N*K, where
 872          * K is the number of residue _types_, which is small and independent of N.
 873          */
 874         found = 0;
 875         for (k = 0; k < ntypes && !found; k++)
 876         {
 877             found = !strcmp(restype[i], p_typename[k]);
 878         }
 879         if (!found)
 880         {
 881             srenew(p_typename, ntypes+1);
 882             p_typename[ntypes++] = strdup(restype[i]);
 883         }
 884     }
 885
 886     if (bVerb)
 887     {
 888         p_status(restype, atoms->nres, p_typename, ntypes);
 889     }
 890
 891     for (k = 0; k < ntypes; k++)
 892     {
 893         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 894
 895         /* Check for special types to do fancy stuff with */
 896
 897         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 898         {
 899             sfree(aid);
 900             /* PROTEIN */
 901             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 902
 903             /* Create a Non-Protein group */
 904             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 905             if ((nra > 0) && (nra < atoms->nr))
 906             {
 907                 add_grp(gb, gn, nra, aid, "non-Protein");
 908             }
 909             sfree(aid);
 910         }
 911         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 912         {
 913             add_grp(gb, gn, nra, aid, p_typename[k]);
 914             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 915             add_grp(gb, gn, nra, aid, "SOL");
 916
 917             sfree(aid);
 918
 919             /* Solvent, create a negated group too */
 920             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 921             if ((nra > 0) && (nra < atoms->nr))
 922             {
 923                 add_grp(gb, gn, nra, aid, "non-Water");
 924             }
 925             sfree(aid);
 926         }
 927         else if (nra > 0)
 928         {
 929             /* Other groups */
 930             add_grp(gb, gn, nra, aid, p_typename[k]);
 931             sfree(aid);
 932             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 933         }
 934     }
 935
 936     sfree(p_typename);
 937     sfree(restype);
 938     gmx_residuetype_destroy(rt);
 939
 940     /* Create a merged water_and_ions group */
 941     iwater = -1;
 942     iion   = -1;
 943     nwater = 0;
 944     nion   = 0;
 945
 946     for (i = 0; i < gb->nr; i++)
 947     {
 948         if (!gmx_strcasecmp((*gn)[i], "Water"))
 949         {
 950             iwater = i;
 951             nwater = gb->index[i+1]-gb->index[i];
 952         }
 953         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 954         {
 955             iion = i;
 956             nion = gb->index[i+1]-gb->index[i];
 957         }
 958     }
 959
 960     if (nwater > 0 && nion > 0)
 961     {
 962         srenew(gb->index, gb->nr+2);
 963         srenew(*gn, gb->nr+1);
 964         (*gn)[gb->nr] = strdup("Water_and_ions");
 965         srenew(gb->a, gb->nra+nwater+nion);
 966         if (nwater > 0)
 967         {
 968             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 969             {
 970                 gb->a[gb->nra++] = gb->a[i];
 971             }
 972         }
 973         if (nion > 0)
 974         {
 975             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 976             {
 977                 gb->a[gb->nra++] = gb->a[i];
 978             }
 979         }
 980         gb->nr++;
 981         gb->index[gb->nr] = gb->nra;
 982     }
 983 }
 984
 985
 986 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 987 {
 988     int i;
 989
 990     for (i = 0; i < n; i++)
 991     {
 992         if (index[i] >= natoms)
 993         {
 994             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 995                       gname ? gname : "Index", i+1, index[i]+1,
 996                       traj ? traj : "the trajectory", natoms);
 997         }
 998         else if (index[i] < 0)
 999         {
1000             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
1001                       gname ? gname : "Index", i+1, index[i]+1);
1002         }
1003     }
1004 }
1005
1006 t_blocka *init_index(const char *gfile, char ***grpname)
1007 {
1008     FILE      *in;
1009     t_blocka  *b;
1010     int        a, maxentries;
1011     int        i, j, ng, nread;
1012     char       line[STRLEN], *pt, str[STRLEN];
1013
1014     in = gmx_fio_fopen(gfile, "r");
1015     snew(b, 1);
1016     get_a_line(in, line, STRLEN);
1017     if (line[0] == '[')
1018     {
1019         /* new format */
1020         b->nr      = 0;
1021         b->index   = NULL;
1022         b->nra     = 0;
1023         b->a       = NULL;
1024         *grpname   = NULL;
1025         maxentries = 0;
1026         do
1027         {
1028             if (get_header(line, str))
1029             {
1030                 b->nr++;
1031                 srenew(b->index, b->nr+1);
1032                 srenew(*grpname, b->nr);
1033                 if (b->nr == 1)
1034                 {
1035                     b->index[0] = 0;
1036                 }
1037                 b->index[b->nr]     = b->index[b->nr-1];
1038                 (*grpname)[b->nr-1] = strdup(str);
1039             }
1040             else
1041             {
1042                 if (b->nr == 0)
1043                 {
1044                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1045                 }
1046                 pt = line;
1047                 while (sscanf(pt, "%s", str) == 1)
1048                 {
1049                     i = b->index[b->nr];
1050                     if (i >= maxentries)
1051                     {
1052                         maxentries += 1024;
1053                         srenew(b->a, maxentries);
1054                     }
1055                     b->a[i] = strtol(str, NULL, 10)-1;
1056                     b->index[b->nr]++;
1057                     (b->nra)++;
1058                     pt = strstr(pt, str)+strlen(str);
1059                 }
1060             }
1061         }
1062         while (get_a_line(in, line, STRLEN));
1063     }
1064     else
1065     {
1066         /* old format */
1067         sscanf(line, "%d%d", &b->nr, &b->nra);
1068         snew(b->index, b->nr+1);
1069         snew(*grpname, b->nr);
1070         b->index[0] = 0;
1071         snew(b->a, b->nra);
1072         for (i = 0; (i < b->nr); i++)
1073         {
1074             nread         = fscanf(in, "%s%d", str, &ng);
1075             (*grpname)[i] = strdup(str);
1076             b->index[i+1] = b->index[i]+ng;
1077             if (b->index[i+1] > b->nra)
1078             {
1079                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1080             }
1081             for (j = 0; (j < ng); j++)
1082             {
1083                 nread               = fscanf(in, "%d", &a);
1084                 b->a[b->index[i]+j] = a;
1085             }
1086         }
1087     }
1088     gmx_fio_fclose(in);
1089
1090     for (i = 0; (i < b->nr); i++)
1091     {
1092         for (j = b->index[i]; (j < b->index[i+1]); j++)
1093         {
1094             if (b->a[j] < 0)
1095             {
1096                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1097                         b->a[j], (*grpname)[i]);
1098             }
1099         }
1100     }
1101
1102     return b;
1103 }
1104
1105 static void minstring(char *str)
1106 {
1107     int i;
1108
1109     for (i = 0; (i < (int)strlen(str)); i++)
1110     {
1111         if (str[i] == '-')
1112         {
1113             str[i] = '_';
1114         }
1115     }
1116 }
1117
1118 int find_group(char s[], int ngrps, char **grpname)
1119 {
1120     int      aa, i, n;
1121     char     string[STRLEN];
1122     gmx_bool bMultiple;
1123
1124     bMultiple = FALSE;
1125     n         = strlen(s);
1126     aa        = NOTSET;
1127     /* first look for whole name match */
1128     if (aa == NOTSET)
1129     {
1130         for (i = 0; i < ngrps; i++)
1131         {
1132             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1133             {
1134                 if (aa != NOTSET)
1135                 {
1136                     bMultiple = TRUE;
1137                 }
1138                 aa = i;
1139             }
1140         }
1141     }
1142     /* second look for first string match */
1143     if (aa == NOTSET)
1144     {
1145         for (i = 0; i < ngrps; i++)
1146         {
1147             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1148             {
1149                 if (aa != NOTSET)
1150                 {
1151                     bMultiple = TRUE;
1152                 }
1153                 aa = i;
1154             }
1155         }
1156     }
1157     /* last look for arbitrary substring match */
1158     if (aa == NOTSET)
1159     {
1160         upstring(s);
1161         minstring(s);
1162         for (i = 0; i < ngrps; i++)
1163         {
1164             strcpy(string, grpname[i]);
1165             upstring(string);
1166             minstring(string);
1167             if (strstr(string, s) != NULL)
1168             {
1169                 if (aa != NOTSET)
1170                 {
1171                     bMultiple = TRUE;
1172                 }
1173                 aa = i;
1174             }
1175         }
1176     }
1177     if (bMultiple)
1178     {
1179         printf("Error: Multiple groups '%s' selected\n", s);
1180         aa = NOTSET;
1181     }
1182     return aa;
1183 }
1184
1185 static int qgroup(int *a, int ngrps, char **grpname)
1186 {
1187     char     s[STRLEN];
1188     int      aa;
1189     gmx_bool bInRange;
1190     char    *end;
1191
1192     do
1193     {
1194         fprintf(stderr, "Select a group: ");
1195         do
1196         {
1197             if (scanf("%s", s) != 1)
1198             {
1199                 gmx_fatal(FARGS, "Cannot read from input");
1200             }
1201             trim(s); /* remove spaces */
1202         }
1203         while (strlen(s) == 0);
1204         aa = strtol(s, &end, 10);
1205         if (aa == 0 && end[0] != '\0') /* string entered */
1206         {
1207             aa = find_group(s, ngrps, grpname);
1208         }
1209         bInRange = (aa >= 0 && aa < ngrps);
1210         if (!bInRange)
1211         {
1212             printf("Error: No such group '%s'\n", s);
1213         }
1214     }
1215     while (!bInRange);
1216     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1217     *a = aa;
1218     return aa;
1219 }
1220
1221 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1222                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1223 {
1224     int i, j, gnr1;
1225
1226     if (grps->nr == 0)
1227     {
1228         gmx_fatal(FARGS, "Error: no groups in indexfile");
1229     }
1230     for (i = 0; (i < grps->nr); i++)
1231     {
1232         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1233                 grps->index[i+1]-grps->index[i]);
1234     }
1235     for (i = 0; (i < ngrps); i++)
1236     {
1237         if (grps->nr > 1)
1238         {
1239             do
1240             {
1241                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1242                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1243                 {
1244                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1245                 }
1246             }
1247             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1248         }
1249         else
1250         {
1251             fprintf(stderr, "There is one group in the index\n");
1252             gnr1 = 0;
1253         }
1254         gnames[i] = strdup(grpname[gnr1]);
1255         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1256         snew(index[i], isize[i]);
1257         for (j = 0; (j < isize[i]); j++)
1258         {
1259             index[i][j] = grps->a[grps->index[gnr1]+j];
1260         }
1261     }
1262 }
1263
1264 void rd_index(const char *statfile, int ngrps, int isize[],
1265               atom_id *index[], char *grpnames[])
1266 {
1267     char    **gnames;
1268     t_blocka *grps;
1269     int      *grpnr;
1270
1271     snew(grpnr, ngrps);
1272     if (!statfile)
1273     {
1274         gmx_fatal(FARGS, "No index file specified");
1275     }
1276     grps = init_index(statfile, &gnames);
1277     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1278 }
1279
1280 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1281                   atom_id *index[], char *grpnames[], int grpnr[])
1282 {
1283     char    **gnames;
1284     t_blocka *grps;
1285
1286     if (!statfile)
1287     {
1288         gmx_fatal(FARGS, "No index file specified");
1289     }
1290     grps = init_index(statfile, &gnames);
1291
1292     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1293 }
1294
1295 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1296                int isize[], atom_id *index[], char *grpnames[])
1297 {
1298     char    ***gnames;
1299     t_blocka  *grps = NULL;
1300     int       *grpnr;
1301
1302     snew(grpnr, ngrps);
1303     snew(gnames, 1);
1304     if (fnm != NULL)
1305     {
1306         grps = init_index(fnm, gnames);
1307     }
1308     else if (atoms)
1309     {
1310         snew(grps, 1);
1311         snew(grps->index, 1);
1312         analyse(atoms, grps, gnames, FALSE, FALSE);
1313     }
1314     else
1315     {
1316         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1317     }
1318
1319     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1320 }
1321
1322 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1323 {
1324     t_cluster_ndx *c;
1325     int            i;
1326
1327     snew(c, 1);
1328     c->clust     = init_index(ndx, &c->grpname);
1329     c->maxframe  = -1;
1330     for (i = 0; (i < c->clust->nra); i++)
1331     {
1332         c->maxframe = max(c->maxframe, c->clust->a[i]);
1333     }
1334     fprintf(fplog ? fplog : stdout,
1335             "There are %d clusters containing %d structures, highest framenr is %d\n",
1336             c->clust->nr, c->clust->nra, c->maxframe);
1337     if (debug)
1338     {
1339         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1340         for (i = 0; (i < c->clust->nra); i++)
1341         {
1342             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1343             {
1344                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1345                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1346             }
1347         }
1348     }
1349     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1350
1351     return c;
1352 }