src/gromacs/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <ctype.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "macros.h"
  46 #include "names.h"
  47 #include "string2.h"
  48 #include "main.h"
  49 #include "typedefs.h"
  50 #include "smalloc.h"
  51 #include "invblock.h"
  52 #include "macros.h"
  53 #include "index.h"
  54 #include "txtdump.h"
  55
  56 #include "gromacs/fileio/futil.h"
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/fileio/strdb.h"
  59
  60 const char gmx_residuetype_undefined[] = "Other";
  61
  62 struct gmx_residuetype
  63 {
  64     int      n;
  65     char **  resname;
  66     char **  restype;
  67
  68 };
  69
  70
  71 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  72 {
  73     char c;
  74
  75     if (bASK)
  76     {
  77         do
  78         {
  79             c = toupper(fgetc(stdin));
  80         }
  81         while ((c != 'Y') && (c != 'N'));
  82
  83         return (c == 'Y');
  84     }
  85     else
  86     {
  87         return FALSE;
  88     }
  89 }
  90
  91 t_blocka *new_blocka(void)
  92 {
  93     t_blocka *block;
  94
  95     snew(block, 1);
  96     snew(block->index, 1);
  97
  98     return block;
  99 }
 100
 101 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
 102 {
 103     FILE *out;
 104     int   i, j, k;
 105
 106     out = gmx_fio_fopen(outf, "w");
 107     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 108     for (i = 0; (i < b->nr); i++)
 109     {
 110         fprintf(out, "[ %s ]\n", gnames[i]);
 111         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 112         {
 113             fprintf(out, "%4d ", b->a[j]+1);
 114             if ((k % 15) == 14)
 115             {
 116                 fprintf(out, "\n");
 117             }
 118         }
 119         fprintf(out, "\n");
 120     }
 121
 122     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 123     if (bDuplicate)
 124     {
 125         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 126         for (i = 0; (i < b->nr); i++)
 127         {
 128             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 129             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 130             {
 131                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 132                 if ((k % 15) == 14)
 133                 {
 134                     fprintf(out, "\n");
 135                 }
 136             }
 137             fprintf(out, "\n");
 138         }
 139     }
 140
 141     gmx_fio_fclose(out);
 142 }
 143
 144 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 145 {
 146     int i;
 147
 148     srenew(b->index, b->nr+2);
 149     srenew(*gnames, b->nr+1);
 150     (*gnames)[b->nr] = strdup(name);
 151
 152     srenew(b->a, b->nra+nra);
 153     for (i = 0; (i < nra); i++)
 154     {
 155         b->a[b->nra++] = a[i];
 156     }
 157     b->nr++;
 158     b->index[b->nr] = b->nra;
 159 }
 160
 161 /* compare index in `a' with group in `b' at `index',
 162    when `index'<0 it is relative to end of `b' */
 163 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 164 {
 165     int i;
 166
 167     if (index < 0)
 168     {
 169         index = b->nr-1+index;
 170     }
 171     if (index >= b->nr)
 172     {
 173         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 174     }
 175     /* compare sizes */
 176     if (nra != b->index[index+1] - b->index[index])
 177     {
 178         return FALSE;
 179     }
 180     for (i = 0; i < nra; i++)
 181     {
 182         if (a[i] != b->a[b->index[index]+i])
 183         {
 184             return FALSE;
 185         }
 186     }
 187     return TRUE;
 188 }
 189
 190 static void
 191 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 192 {
 193     int   i, j;
 194     int   found;
 195
 196     int * counter;
 197
 198     snew(counter, ntypes);
 199     for (i = 0; i < ntypes; i++)
 200     {
 201         counter[i] = 0;
 202     }
 203     for (i = 0; i < nres; i++)
 204     {
 205         found = 0;
 206         for (j = 0; j < ntypes; j++)
 207         {
 208             if (!gmx_strcasecmp(restype[i], typenames[j]))
 209             {
 210                 counter[j]++;
 211             }
 212         }
 213     }
 214
 215     for (i = 0; (i < ntypes); i++)
 216     {
 217         if (counter[i] > 0)
 218         {
 219             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 220         }
 221     }
 222
 223     sfree(counter);
 224 }
 225
 226
 227 atom_id *
 228 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 229 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 230 {
 231     atom_id *a;
 232     int      i;
 233     int      res;
 234
 235     snew(a, atoms->nr);
 236     *nra = 0;
 237     for (i = 0; (i < atoms->nr); i++)
 238     {
 239         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 240         if (bMatch == FALSE)
 241         {
 242             res = !res;
 243         }
 244         if (res)
 245         {
 246             a[(*nra)++] = i;
 247         }
 248     }
 249
 250     return a;
 251 }
 252
 253 typedef struct {
 254     char    *rname;
 255     gmx_bool bNeg;
 256     char    *gname;
 257 } restp_t;
 258
 259 static void analyse_other(const char ** restype, t_atoms *atoms,
 260                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 261 {
 262     restp_t *restp = NULL;
 263     char   **attp  = NULL;
 264     char    *rname, *aname;
 265     atom_id *other_ndx, *aid, *aaid;
 266     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 267
 268     for (i = 0; (i < atoms->nres); i++)
 269     {
 270         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 271         {
 272             break;
 273         }
 274     }
 275     if (i < atoms->nres)
 276     {
 277         /* we have others */
 278         if (bVerb)
 279         {
 280             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 281         }
 282         snew(other_ndx, atoms->nr);
 283         for (k = 0; (k < atoms->nr); k++)
 284         {
 285             resind = atoms->atom[k].resind;
 286             rname  = *atoms->resinfo[resind].name;
 287             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 288                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 289             {
 290
 291                 for (l = 0; (l < nrestp); l++)
 292                 {
 293                     if (strcmp(restp[l].rname, rname) == 0)
 294                     {
 295                         break;
 296                     }
 297                 }
 298                 if (l == nrestp)
 299                 {
 300                     srenew(restp, nrestp+1);
 301                     restp[nrestp].rname = strdup(rname);
 302                     restp[nrestp].bNeg  = FALSE;
 303                     restp[nrestp].gname = strdup(rname);
 304                     nrestp++;
 305
 306                 }
 307             }
 308         }
 309         for (i = 0; (i < nrestp); i++)
 310         {
 311             snew(aid, atoms->nr);
 312             naid = 0;
 313             for (j = 0; (j < atoms->nr); j++)
 314             {
 315                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 316                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 317                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 318                 {
 319                     aid[naid++] = j;
 320                 }
 321             }
 322             add_grp(gb, gn, naid, aid, restp[i].gname);
 323             if (bASK)
 324             {
 325                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 326                 fflush(stdout);
 327                 if (gmx_ask_yesno(bASK))
 328                 {
 329                     natp = 0;
 330                     for (k = 0; (k < naid); k++)
 331                     {
 332                         aname = *atoms->atomname[aid[k]];
 333                         for (l = 0; (l < natp); l++)
 334                         {
 335                             if (strcmp(aname, attp[l]) == 0)
 336                             {
 337                                 break;
 338                             }
 339                         }
 340                         if (l == natp)
 341                         {
 342                             srenew(attp, ++natp);
 343                             attp[natp-1] = aname;
 344                         }
 345                     }
 346                     if (natp > 1)
 347                     {
 348                         for (l = 0; (l < natp); l++)
 349                         {
 350                             snew(aaid, naid);
 351                             naaid = 0;
 352                             for (k = 0; (k < naid); k++)
 353                             {
 354                                 aname = *atoms->atomname[aid[k]];
 355                                 if (strcmp(aname, attp[l]) == 0)
 356                                 {
 357                                     aaid[naaid++] = aid[k];
 358                                 }
 359                             }
 360                             add_grp(gb, gn, naaid, aaid, attp[l]);
 361                             sfree(aaid);
 362                         }
 363                     }
 364                     sfree(attp);
 365                     attp = NULL;
 366                 }
 367                 sfree(aid);
 368             }
 369         }
 370         sfree(other_ndx);
 371     }
 372 }
 373
 374 /*! /brief Instances of this struct contain the data necessary to
 375  *         construct a single (protein) index group in
 376  *         analyse_prot(). */
 377 typedef struct gmx_help_make_index_group
 378 {
 379     /** The set of atom names that will be used to form this index group */
 380     const char **defining_atomnames;
 381     /** Size of the defining_atomnames array */
 382     const int    num_defining_atomnames;
 383     /** Name of this index group */
 384     const char  *group_name;
 385     /** Whether the above atom names name the atoms in the group, or
 386         those not in the group */
 387     gmx_bool bTakeComplement;
 388     /** The index in wholename gives the first item in the arrays of
 389        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 390        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 391        This is comparable to using a '*' wildcard at the end of specific
 392        atom names, but that is more involved to implement...
 393      */
 394     int wholename;
 395     /** Only create this index group if it differs from the one specified in compareto,
 396        where -1 means to always create this group. */
 397     int compareto;
 398 } t_gmx_help_make_index_group;
 399
 400 static void analyse_prot(const char ** restype, t_atoms *atoms,
 401                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 402 {
 403     /* lists of atomnames to be used in constructing index groups: */
 404     static const char *pnoh[]    = { "H", "HN" };
 405     static const char *pnodum[]  = {
 406         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 407         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 408     };
 409     static const char *calpha[]  = { "CA" };
 410     static const char *bb[]      = { "N", "CA", "C" };
 411     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 412     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 413     static const char *mch[]     = {
 414         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 415         "H1", "H2", "H3", "H", "HN"
 416     };
 417
 418     static const t_gmx_help_make_index_group constructing_data[] =
 419     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 420      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 421      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 422      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 423      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 424      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 425      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 426      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 427      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 428      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 429     const int   num_index_groups = asize(constructing_data);
 430
 431     int         n, j;
 432     atom_id    *aid;
 433     int         nra, nnpres, npres;
 434     gmx_bool    match;
 435     char        ndx_name[STRLEN], *atnm;
 436     int         i;
 437
 438     if (bVerb)
 439     {
 440         printf("Analysing Protein...\n");
 441     }
 442     snew(aid, atoms->nr);
 443
 444     /* calculate the number of protein residues */
 445     npres = 0;
 446     for (i = 0; (i < atoms->nres); i++)
 447     {
 448         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 449         {
 450             npres++;
 451         }
 452     }
 453     /* find matching or complement atoms */
 454     for (i = 0; (i < (int)num_index_groups); i++)
 455     {
 456         nra = 0;
 457         for (n = 0; (n < atoms->nr); n++)
 458         {
 459             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 460             {
 461                 match = FALSE;
 462                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 463                 {
 464                     /* skip digits at beginning of atomname, e.g. 1H */
 465                     atnm = *atoms->atomname[n];
 466                     while (isdigit(atnm[0]))
 467                     {
 468                         atnm++;
 469                     }
 470                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 471                     {
 472                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 473                         {
 474                             match = TRUE;
 475                         }
 476                     }
 477                     else
 478                     {
 479                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 480                         {
 481                             match = TRUE;
 482                         }
 483                     }
 484                 }
 485                 if (constructing_data[i].bTakeComplement != match)
 486                 {
 487                     aid[nra++] = n;
 488                 }
 489             }
 490         }
 491         /* if we want to add this group always or it differs from previous
 492            group, add it: */
 493         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 494         {
 495             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 496         }
 497     }
 498
 499     if (bASK)
 500     {
 501         for (i = 0; (i < (int)num_index_groups); i++)
 502         {
 503             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 504             if (gmx_ask_yesno(bASK))
 505             {
 506                 int resind;
 507                 nra = 0;
 508                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 509                 {
 510                     resind = atoms->atom[n].resind;
 511                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 512                     {
 513                         match = FALSE;
 514                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 515                         {
 516                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 517                             {
 518                                 match = TRUE;
 519                             }
 520                         }
 521                         if (constructing_data[i].bTakeComplement != match)
 522                         {
 523                             aid[nra++] = n;
 524                         }
 525                     }
 526                     /* copy the residuename to the tail of the groupname */
 527                     if (nra > 0)
 528                     {
 529                         t_resinfo *ri;
 530                         ri = &atoms->resinfo[resind];
 531                         sprintf(ndx_name, "%s_%s%d%c",
 532                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 533                         add_grp(gb, gn, nra, aid, ndx_name);
 534                         nra = 0;
 535                     }
 536                 }
 537             }
 538         }
 539         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 540         if (gmx_ask_yesno(bASK))
 541         {
 542             /* Make swap sidechain C=O index */
 543             int resind, hold;
 544             nra = 0;
 545             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 546             {
 547                 resind = atoms->atom[n].resind;
 548                 hold   = -1;
 549                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 550                 {
 551                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 552                     {
 553                         aid[nra++] = n;
 554                         hold       = nra;
 555                         nra       += 2;
 556                     }
 557                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 558                     {
 559                         if (hold == -1)
 560                         {
 561                             gmx_incons("Atom naming problem");
 562                         }
 563                         aid[hold] = n;
 564                     }
 565                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 566                     {
 567                         if (hold == -1)
 568                         {
 569                             gmx_incons("Atom naming problem");
 570                         }
 571                         aid[hold+1] = n;
 572                     }
 573                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 574                     {
 575                         if (hold == -1)
 576                         {
 577                             gmx_incons("Atom naming problem");
 578                         }
 579                         aid[hold+1] = n;
 580                     }
 581                     else
 582                     {
 583                         aid[nra++] = n;
 584                     }
 585                 }
 586             }
 587             /* copy the residuename to the tail of the groupname */
 588             if (nra > 0)
 589             {
 590                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 591                 nra = 0;
 592             }
 593         }
 594     }
 595     sfree(aid);
 596 }
 597
 598
 599
 600
 601 /* Return 0 if the name was found, otherwise -1.
 602  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 603  */
 604 int
 605 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 606 {
 607     int    i, rc;
 608
 609     rc = -1;
 610     for (i = 0; i < rt->n && rc; i++)
 611     {
 612         rc = gmx_strcasecmp(rt->resname[i], resname);
 613     }
 614
 615     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 616
 617     return rc;
 618 }
 619
 620 int
 621 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 622 {
 623     int           i;
 624     int           found;
 625     const char *  p_oldtype;
 626
 627     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 628
 629     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 630     {
 631         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 632                 newresname, p_oldtype, newrestype);
 633     }
 634
 635     if (found == 0)
 636     {
 637         srenew(rt->resname, rt->n+1);
 638         srenew(rt->restype, rt->n+1);
 639         rt->resname[rt->n] = strdup(newresname);
 640         rt->restype[rt->n] = strdup(newrestype);
 641         rt->n++;
 642     }
 643
 644     return 0;
 645 }
 646
 647
 648 int
 649 gmx_residuetype_init(gmx_residuetype_t *prt)
 650 {
 651     FILE                 *  db;
 652     char                    line[STRLEN];
 653     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 654     char                 *  p;
 655     int                     i;
 656     struct gmx_residuetype *rt;
 657
 658     snew(rt, 1);
 659     *prt = rt;
 660
 661     rt->n        = 0;
 662     rt->resname  = NULL;
 663     rt->restype  = NULL;
 664
 665     db = libopen("residuetypes.dat");
 666
 667     while (get_a_line(db, line, STRLEN))
 668     {
 669         strip_comment(line);
 670         trim(line);
 671         if (line[0] != '\0')
 672         {
 673             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 674             {
 675                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 676             }
 677             gmx_residuetype_add(rt, resname, restype);
 678         }
 679     }
 680
 681     fclose(db);
 682
 683     return 0;
 684 }
 685
 686
 687
 688 int
 689 gmx_residuetype_destroy(gmx_residuetype_t rt)
 690 {
 691     int i;
 692
 693     for (i = 0; i < rt->n; i++)
 694     {
 695         sfree(rt->resname[i]);
 696         sfree(rt->restype[i]);
 697     }
 698     sfree(rt->resname);
 699     sfree(rt->restype);
 700     sfree(rt);
 701
 702     return 0;
 703 }
 704
 705 int
 706 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 707                              const char ***       p_typenames,
 708                              int *                ntypes)
 709 {
 710     int            i, j, n;
 711     int            found;
 712     const char **  my_typename;
 713     char       *   p;
 714
 715     n = 0;
 716
 717     my_typename = NULL;
 718     for (i = 0; i < rt->n; i++)
 719     {
 720         p     = rt->restype[i];
 721         found = 0;
 722         for (j = 0; j < n && !found; j++)
 723         {
 724             found = !gmx_strcasecmp(p, my_typename[j]);
 725         }
 726
 727         if (!found)
 728         {
 729             srenew(my_typename, n+1);
 730             my_typename[n] = p;
 731             n++;
 732         }
 733     }
 734     *ntypes      = n;
 735     *p_typenames = my_typename;
 736
 737     return 0;
 738 }
 739
 740
 741
 742 gmx_bool
 743 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 744 {
 745     gmx_bool    rc;
 746     const char *p_type;
 747
 748     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 749         gmx_strcasecmp(p_type, "Protein") == 0)
 750     {
 751         rc = TRUE;
 752     }
 753     else
 754     {
 755         rc = FALSE;
 756     }
 757     return rc;
 758 }
 759
 760 gmx_bool
 761 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 762 {
 763     gmx_bool    rc;
 764     const char *p_type;
 765
 766     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 767         gmx_strcasecmp(p_type, "DNA") == 0)
 768     {
 769         rc = TRUE;
 770     }
 771     else
 772     {
 773         rc = FALSE;
 774     }
 775     return rc;
 776 }
 777
 778 gmx_bool
 779 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 780 {
 781     gmx_bool    rc;
 782     const char *p_type;
 783
 784     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 785         gmx_strcasecmp(p_type, "RNA") == 0)
 786     {
 787         rc = TRUE;
 788     }
 789     else
 790     {
 791         rc = FALSE;
 792     }
 793     return rc;
 794 }
 795
 796 /* Return the size of the arrays */
 797 int
 798 gmx_residuetype_get_size(gmx_residuetype_t rt)
 799 {
 800     return rt->n;
 801 }
 802
 803 /* Search for a residuetype with name resnm within the
 804  * gmx_residuetype database. Return the index if found,
 805  * otherwise -1.
 806  */
 807 int
 808 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 809 {
 810     int i, rc;
 811
 812     rc = -1;
 813     for (i = 0; i < rt->n && rc; i++)
 814     {
 815         rc = gmx_strcasecmp(rt->resname[i], resnm);
 816     }
 817
 818     return (0 == rc) ? i-1 : -1;
 819 }
 820
 821 /* Return the name of the residuetype with the given index, or
 822  * NULL if not found. */
 823 const char *
 824 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 825 {
 826     if (index >= 0 && index < rt->n)
 827     {
 828         return rt->resname[index];
 829     }
 830     else
 831     {
 832         return NULL;
 833     }
 834 }
 835
 836
 837
 838 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 839 {
 840     gmx_residuetype_t rt = NULL;
 841     char             *resnm;
 842     atom_id          *aid;
 843     const char    **  restype;
 844     int               nra;
 845     int               i, k;
 846     size_t            j;
 847     int               ntypes;
 848     char           *  p;
 849     const char     ** p_typename;
 850     int               iwater, iion;
 851     int               nwater, nion;
 852     int               found;
 853
 854     if (bVerb)
 855     {
 856         printf("Analysing residue names:\n");
 857     }
 858     /* Create system group, every single atom */
 859     snew(aid, atoms->nr);
 860     for (i = 0; i < atoms->nr; i++)
 861     {
 862         aid[i] = i;
 863     }
 864     add_grp(gb, gn, atoms->nr, aid, "System");
 865     sfree(aid);
 866
 867     /* For every residue, get a pointer to the residue type name */
 868     gmx_residuetype_init(&rt);
 869     assert(rt);
 870
 871     snew(restype, atoms->nres);
 872     ntypes     = 0;
 873     p_typename = NULL;
 874     for (i = 0; i < atoms->nres; i++)
 875     {
 876         resnm = *atoms->resinfo[i].name;
 877         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 878
 879         /* Note that this does not lead to a N*N loop, but N*K, where
 880          * K is the number of residue _types_, which is small and independent of N.
 881          */
 882         found = 0;
 883         for (k = 0; k < ntypes && !found; k++)
 884         {
 885             found = !strcmp(restype[i], p_typename[k]);
 886         }
 887         if (!found)
 888         {
 889             srenew(p_typename, ntypes+1);
 890             p_typename[ntypes++] = strdup(restype[i]);
 891         }
 892     }
 893
 894     if (bVerb)
 895     {
 896         p_status(restype, atoms->nres, p_typename, ntypes);
 897     }
 898
 899     for (k = 0; k < ntypes; k++)
 900     {
 901         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 902
 903         /* Check for special types to do fancy stuff with */
 904
 905         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 906         {
 907             sfree(aid);
 908             /* PROTEIN */
 909             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 910
 911             /* Create a Non-Protein group */
 912             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 913             if ((nra > 0) && (nra < atoms->nr))
 914             {
 915                 add_grp(gb, gn, nra, aid, "non-Protein");
 916             }
 917             sfree(aid);
 918         }
 919         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 920         {
 921             add_grp(gb, gn, nra, aid, p_typename[k]);
 922             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 923             add_grp(gb, gn, nra, aid, "SOL");
 924
 925             sfree(aid);
 926
 927             /* Solvent, create a negated group too */
 928             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 929             if ((nra > 0) && (nra < atoms->nr))
 930             {
 931                 add_grp(gb, gn, nra, aid, "non-Water");
 932             }
 933             sfree(aid);
 934         }
 935         else if (nra > 0)
 936         {
 937             /* Other groups */
 938             add_grp(gb, gn, nra, aid, p_typename[k]);
 939             sfree(aid);
 940             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 941         }
 942     }
 943
 944     sfree(p_typename);
 945     sfree(restype);
 946     gmx_residuetype_destroy(rt);
 947
 948     /* Create a merged water_and_ions group */
 949     iwater = -1;
 950     iion   = -1;
 951     nwater = 0;
 952     nion   = 0;
 953
 954     for (i = 0; i < gb->nr; i++)
 955     {
 956         if (!gmx_strcasecmp((*gn)[i], "Water"))
 957         {
 958             iwater = i;
 959             nwater = gb->index[i+1]-gb->index[i];
 960         }
 961         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 962         {
 963             iion = i;
 964             nion = gb->index[i+1]-gb->index[i];
 965         }
 966     }
 967
 968     if (nwater > 0 && nion > 0)
 969     {
 970         srenew(gb->index, gb->nr+2);
 971         srenew(*gn, gb->nr+1);
 972         (*gn)[gb->nr] = strdup("Water_and_ions");
 973         srenew(gb->a, gb->nra+nwater+nion);
 974         if (nwater > 0)
 975         {
 976             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 977             {
 978                 gb->a[gb->nra++] = gb->a[i];
 979             }
 980         }
 981         if (nion > 0)
 982         {
 983             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 984             {
 985                 gb->a[gb->nra++] = gb->a[i];
 986             }
 987         }
 988         gb->nr++;
 989         gb->index[gb->nr] = gb->nra;
 990     }
 991 }
 992
 993
 994 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 995 {
 996     int i;
 997
 998     for (i = 0; i < n; i++)
 999     {
1000         if (index[i] >= natoms)
1001         {
1002             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
1003                       gname ? gname : "Index", i+1, index[i]+1,
1004                       traj ? traj : "the trajectory", natoms);
1005         }
1006         else if (index[i] < 0)
1007         {
1008             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
1009                       gname ? gname : "Index", i+1, index[i]+1);
1010         }
1011     }
1012 }
1013
1014 t_blocka *init_index(const char *gfile, char ***grpname)
1015 {
1016     FILE      *in;
1017     t_blocka  *b;
1018     int        a, maxentries;
1019     int        i, j, ng, nread;
1020     char       line[STRLEN], *pt, str[STRLEN];
1021
1022     in = gmx_fio_fopen(gfile, "r");
1023     snew(b, 1);
1024     get_a_line(in, line, STRLEN);
1025     if (line[0] == '[')
1026     {
1027         /* new format */
1028         b->nr      = 0;
1029         b->index   = NULL;
1030         b->nra     = 0;
1031         b->a       = NULL;
1032         *grpname   = NULL;
1033         maxentries = 0;
1034         do
1035         {
1036             if (get_header(line, str))
1037             {
1038                 b->nr++;
1039                 srenew(b->index, b->nr+1);
1040                 srenew(*grpname, b->nr);
1041                 if (b->nr == 1)
1042                 {
1043                     b->index[0] = 0;
1044                 }
1045                 b->index[b->nr]     = b->index[b->nr-1];
1046                 (*grpname)[b->nr-1] = strdup(str);
1047             }
1048             else
1049             {
1050                 if (b->nr == 0)
1051                 {
1052                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1053                 }
1054                 pt = line;
1055                 while (sscanf(pt, "%s", str) == 1)
1056                 {
1057                     i = b->index[b->nr];
1058                     if (i >= maxentries)
1059                     {
1060                         maxentries += 1024;
1061                         srenew(b->a, maxentries);
1062                     }
1063                     b->a[i] = strtol(str, NULL, 10)-1;
1064                     b->index[b->nr]++;
1065                     (b->nra)++;
1066                     pt = strstr(pt, str)+strlen(str);
1067                 }
1068             }
1069         }
1070         while (get_a_line(in, line, STRLEN));
1071     }
1072     else
1073     {
1074         /* old format */
1075         sscanf(line, "%d%d", &b->nr, &b->nra);
1076         snew(b->index, b->nr+1);
1077         snew(*grpname, b->nr);
1078         b->index[0] = 0;
1079         snew(b->a, b->nra);
1080         for (i = 0; (i < b->nr); i++)
1081         {
1082             nread         = fscanf(in, "%s%d", str, &ng);
1083             (*grpname)[i] = strdup(str);
1084             b->index[i+1] = b->index[i]+ng;
1085             if (b->index[i+1] > b->nra)
1086             {
1087                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1088             }
1089             for (j = 0; (j < ng); j++)
1090             {
1091                 nread               = fscanf(in, "%d", &a);
1092                 b->a[b->index[i]+j] = a;
1093             }
1094         }
1095     }
1096     gmx_fio_fclose(in);
1097
1098     for (i = 0; (i < b->nr); i++)
1099     {
1100         for (j = b->index[i]; (j < b->index[i+1]); j++)
1101         {
1102             if (b->a[j] < 0)
1103             {
1104                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1105                         b->a[j], (*grpname)[i]);
1106             }
1107         }
1108     }
1109
1110     return b;
1111 }
1112
1113 static void minstring(char *str)
1114 {
1115     int i;
1116
1117     for (i = 0; (i < (int)strlen(str)); i++)
1118     {
1119         if (str[i] == '-')
1120         {
1121             str[i] = '_';
1122         }
1123     }
1124 }
1125
1126 int find_group(char s[], int ngrps, char **grpname)
1127 {
1128     int      aa, i, n;
1129     char     string[STRLEN];
1130     gmx_bool bMultiple;
1131
1132     bMultiple = FALSE;
1133     n         = strlen(s);
1134     aa        = NOTSET;
1135     /* first look for whole name match */
1136     if (aa == NOTSET)
1137     {
1138         for (i = 0; i < ngrps; i++)
1139         {
1140             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1141             {
1142                 if (aa != NOTSET)
1143                 {
1144                     bMultiple = TRUE;
1145                 }
1146                 aa = i;
1147             }
1148         }
1149     }
1150     /* second look for first string match */
1151     if (aa == NOTSET)
1152     {
1153         for (i = 0; i < ngrps; i++)
1154         {
1155             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1156             {
1157                 if (aa != NOTSET)
1158                 {
1159                     bMultiple = TRUE;
1160                 }
1161                 aa = i;
1162             }
1163         }
1164     }
1165     /* last look for arbitrary substring match */
1166     if (aa == NOTSET)
1167     {
1168         upstring(s);
1169         minstring(s);
1170         for (i = 0; i < ngrps; i++)
1171         {
1172             strcpy(string, grpname[i]);
1173             upstring(string);
1174             minstring(string);
1175             if (strstr(string, s) != NULL)
1176             {
1177                 if (aa != NOTSET)
1178                 {
1179                     bMultiple = TRUE;
1180                 }
1181                 aa = i;
1182             }
1183         }
1184     }
1185     if (bMultiple)
1186     {
1187         printf("Error: Multiple groups '%s' selected\n", s);
1188         aa = NOTSET;
1189     }
1190     return aa;
1191 }
1192
1193 static int qgroup(int *a, int ngrps, char **grpname)
1194 {
1195     char     s[STRLEN];
1196     int      aa;
1197     gmx_bool bInRange;
1198     char    *end;
1199
1200     do
1201     {
1202         fprintf(stderr, "Select a group: ");
1203         do
1204         {
1205             if (scanf("%s", s) != 1)
1206             {
1207                 gmx_fatal(FARGS, "Cannot read from input");
1208             }
1209             trim(s); /* remove spaces */
1210         }
1211         while (strlen(s) == 0);
1212         aa = strtol(s, &end, 10);
1213         if (aa == 0 && end[0] != '\0') /* string entered */
1214         {
1215             aa = find_group(s, ngrps, grpname);
1216         }
1217         bInRange = (aa >= 0 && aa < ngrps);
1218         if (!bInRange)
1219         {
1220             printf("Error: No such group '%s'\n", s);
1221         }
1222     }
1223     while (!bInRange);
1224     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1225     *a = aa;
1226     return aa;
1227 }
1228
1229 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1230                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1231 {
1232     int i, j, gnr1;
1233
1234     if (grps->nr == 0)
1235     {
1236         gmx_fatal(FARGS, "Error: no groups in indexfile");
1237     }
1238     for (i = 0; (i < grps->nr); i++)
1239     {
1240         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1241                 grps->index[i+1]-grps->index[i]);
1242     }
1243     for (i = 0; (i < ngrps); i++)
1244     {
1245         if (grps->nr > 1)
1246         {
1247             do
1248             {
1249                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1250                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1251                 {
1252                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1253                 }
1254             }
1255             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1256         }
1257         else
1258         {
1259             fprintf(stderr, "There is one group in the index\n");
1260             gnr1 = 0;
1261         }
1262         gnames[i] = strdup(grpname[gnr1]);
1263         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1264         snew(index[i], isize[i]);
1265         for (j = 0; (j < isize[i]); j++)
1266         {
1267             index[i][j] = grps->a[grps->index[gnr1]+j];
1268         }
1269     }
1270 }
1271
1272 void rd_index(const char *statfile, int ngrps, int isize[],
1273               atom_id *index[], char *grpnames[])
1274 {
1275     char    **gnames;
1276     t_blocka *grps;
1277     int      *grpnr;
1278
1279     snew(grpnr, ngrps);
1280     if (!statfile)
1281     {
1282         gmx_fatal(FARGS, "No index file specified");
1283     }
1284     grps = init_index(statfile, &gnames);
1285     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1286 }
1287
1288 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1289                   atom_id *index[], char *grpnames[], int grpnr[])
1290 {
1291     char    **gnames;
1292     t_blocka *grps;
1293
1294     if (!statfile)
1295     {
1296         gmx_fatal(FARGS, "No index file specified");
1297     }
1298     grps = init_index(statfile, &gnames);
1299
1300     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1301 }
1302
1303 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1304                int isize[], atom_id *index[], char *grpnames[])
1305 {
1306     char    ***gnames;
1307     t_blocka  *grps = NULL;
1308     int       *grpnr;
1309
1310     snew(grpnr, ngrps);
1311     snew(gnames, 1);
1312     if (fnm != NULL)
1313     {
1314         grps = init_index(fnm, gnames);
1315     }
1316     else if (atoms)
1317     {
1318         snew(grps, 1);
1319         snew(grps->index, 1);
1320         analyse(atoms, grps, gnames, FALSE, FALSE);
1321     }
1322     else
1323     {
1324         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1325     }
1326
1327     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1328 }
1329
1330 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1331 {
1332     t_cluster_ndx *c;
1333     int            i;
1334
1335     snew(c, 1);
1336     c->clust     = init_index(ndx, &c->grpname);
1337     c->maxframe  = -1;
1338     for (i = 0; (i < c->clust->nra); i++)
1339     {
1340         c->maxframe = max(c->maxframe, c->clust->a[i]);
1341     }
1342     fprintf(fplog ? fplog : stdout,
1343             "There are %d clusters containing %d structures, highest framenr is %d\n",
1344             c->clust->nr, c->clust->nra, c->maxframe);
1345     if (debug)
1346     {
1347         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1348         for (i = 0; (i < c->clust->nra); i++)
1349         {
1350             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1351             {
1352                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1353                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1354             }
1355         }
1356     }
1357     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1358
1359     return c;
1360 }