src/gromacs/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <ctype.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "macros.h"
  46 #include "names.h"
  47 #include "string2.h"
  48 #include "main.h"
  49 #include "typedefs.h"
  50 #include "smalloc.h"
  51 #include "invblock.h"
  52 #include "macros.h"
  53 #include "index.h"
  54 #include "txtdump.h"
  55
  56 #include "gromacs/fileio/futil.h"
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/fileio/strdb.h"
  59
  60 const char gmx_residuetype_undefined[] = "Other";
  61
  62 struct gmx_residuetype
  63 {
  64     int      n;
  65     char **  resname;
  66     char **  restype;
  67
  68 };
  69
  70
  71 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  72 {
  73     char c;
  74
  75     if (bASK)
  76     {
  77         do
  78         {
  79             c = toupper(fgetc(stdin));
  80         }
  81         while ((c != 'Y') && (c != 'N'));
  82
  83         return (c == 'Y');
  84     }
  85     else
  86     {
  87         return FALSE;
  88     }
  89 }
  90
  91 t_blocka *new_blocka(void)
  92 {
  93     t_blocka *block;
  94
  95     snew(block, 1);
  96     snew(block->index, 1);
  97
  98     return block;
  99 }
 100
 101 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
 102 {
 103     FILE *out;
 104     int   i, j, k;
 105
 106     out = gmx_fio_fopen(outf, "w");
 107     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 108     for (i = 0; (i < b->nr); i++)
 109     {
 110         fprintf(out, "[ %s ]\n", gnames[i]);
 111         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 112         {
 113             fprintf(out, "%4d ", b->a[j]+1);
 114             if ((k % 15) == 14)
 115             {
 116                 fprintf(out, "\n");
 117             }
 118         }
 119         fprintf(out, "\n");
 120     }
 121
 122     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 123     if (bDuplicate)
 124     {
 125         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 126         for (i = 0; (i < b->nr); i++)
 127         {
 128             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 129             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 130             {
 131                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 132                 if ((k % 15) == 14)
 133                 {
 134                     fprintf(out, "\n");
 135                 }
 136             }
 137             fprintf(out, "\n");
 138         }
 139     }
 140
 141     gmx_fio_fclose(out);
 142 }
 143
 144 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 145 {
 146     int i;
 147
 148     srenew(b->index, b->nr+2);
 149     srenew(*gnames, b->nr+1);
 150     (*gnames)[b->nr] = strdup(name);
 151
 152     srenew(b->a, b->nra+nra);
 153     for (i = 0; (i < nra); i++)
 154     {
 155         b->a[b->nra++] = a[i];
 156     }
 157     b->nr++;
 158     b->index[b->nr] = b->nra;
 159 }
 160
 161 /* compare index in `a' with group in `b' at `index',
 162    when `index'<0 it is relative to end of `b' */
 163 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 164 {
 165     int i;
 166
 167     if (index < 0)
 168     {
 169         index = b->nr-1+index;
 170     }
 171     if (index >= b->nr)
 172     {
 173         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 174     }
 175     /* compare sizes */
 176     if (nra != b->index[index+1] - b->index[index])
 177     {
 178         return FALSE;
 179     }
 180     for (i = 0; i < nra; i++)
 181     {
 182         if (a[i] != b->a[b->index[index]+i])
 183         {
 184             return FALSE;
 185         }
 186     }
 187     return TRUE;
 188 }
 189
 190 static void
 191 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 192 {
 193     int   i, j;
 194     int   found;
 195
 196     int * counter;
 197
 198     snew(counter, ntypes);
 199     for (i = 0; i < ntypes; i++)
 200     {
 201         counter[i] = 0;
 202     }
 203     for (i = 0; i < nres; i++)
 204     {
 205         found = 0;
 206         for (j = 0; j < ntypes; j++)
 207         {
 208             if (!gmx_strcasecmp(restype[i], typenames[j]))
 209             {
 210                 counter[j]++;
 211             }
 212         }
 213     }
 214
 215     for (i = 0; (i < ntypes); i++)
 216     {
 217         if (counter[i] > 0)
 218         {
 219             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 220         }
 221     }
 222
 223     sfree(counter);
 224 }
 225
 226
 227 atom_id *
 228 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 229 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 230 {
 231     atom_id *a;
 232     int      i;
 233     int      res;
 234
 235     snew(a, atoms->nr);
 236     *nra = 0;
 237     for (i = 0; (i < atoms->nr); i++)
 238     {
 239         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 240         if (bMatch == FALSE)
 241         {
 242             res = !res;
 243         }
 244         if (res)
 245         {
 246             a[(*nra)++] = i;
 247         }
 248     }
 249
 250     return a;
 251 }
 252
 253 typedef struct {
 254     char    *rname;
 255     gmx_bool bNeg;
 256     char    *gname;
 257 } restp_t;
 258
 259 static void analyse_other(const char ** restype, t_atoms *atoms,
 260                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 261 {
 262     restp_t *restp = NULL;
 263     char   **attp  = NULL;
 264     char    *rname, *aname;
 265     atom_id *other_ndx, *aid, *aaid;
 266     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 267
 268     for (i = 0; (i < atoms->nres); i++)
 269     {
 270         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 271         {
 272             break;
 273         }
 274     }
 275     if (i < atoms->nres)
 276     {
 277         /* we have others */
 278         if (bVerb)
 279         {
 280             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 281         }
 282         snew(other_ndx, atoms->nr);
 283         for (k = 0; (k < atoms->nr); k++)
 284         {
 285             resind = atoms->atom[k].resind;
 286             rname  = *atoms->resinfo[resind].name;
 287             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 288                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 289             {
 290
 291                 for (l = 0; (l < nrestp); l++)
 292                 {
 293                     if (strcmp(restp[l].rname, rname) == 0)
 294                     {
 295                         break;
 296                     }
 297                 }
 298                 if (l == nrestp)
 299                 {
 300                     srenew(restp, nrestp+1);
 301                     restp[nrestp].rname = strdup(rname);
 302                     restp[nrestp].bNeg  = FALSE;
 303                     restp[nrestp].gname = strdup(rname);
 304                     nrestp++;
 305
 306                 }
 307             }
 308         }
 309         for (i = 0; (i < nrestp); i++)
 310         {
 311             snew(aid, atoms->nr);
 312             naid = 0;
 313             for (j = 0; (j < atoms->nr); j++)
 314             {
 315                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 316                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 317                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 318                 {
 319                     aid[naid++] = j;
 320                 }
 321             }
 322             add_grp(gb, gn, naid, aid, restp[i].gname);
 323             if (bASK)
 324             {
 325                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 326                 fflush(stdout);
 327                 if (gmx_ask_yesno(bASK))
 328                 {
 329                     natp = 0;
 330                     for (k = 0; (k < naid); k++)
 331                     {
 332                         aname = *atoms->atomname[aid[k]];
 333                         for (l = 0; (l < natp); l++)
 334                         {
 335                             if (strcmp(aname, attp[l]) == 0)
 336                             {
 337                                 break;
 338                             }
 339                         }
 340                         if (l == natp)
 341                         {
 342                             srenew(attp, ++natp);
 343                             attp[natp-1] = aname;
 344                         }
 345                     }
 346                     if (natp > 1)
 347                     {
 348                         for (l = 0; (l < natp); l++)
 349                         {
 350                             snew(aaid, naid);
 351                             naaid = 0;
 352                             for (k = 0; (k < naid); k++)
 353                             {
 354                                 aname = *atoms->atomname[aid[k]];
 355                                 if (strcmp(aname, attp[l]) == 0)
 356                                 {
 357                                     aaid[naaid++] = aid[k];
 358                                 }
 359                             }
 360                             add_grp(gb, gn, naaid, aaid, attp[l]);
 361                             sfree(aaid);
 362                         }
 363                     }
 364                     sfree(attp);
 365                     attp = NULL;
 366                 }
 367                 sfree(aid);
 368             }
 369         }
 370         sfree(other_ndx);
 371     }
 372 }
 373
 374 /*! \brief
 375  * Cata necessary to construct a single (protein) index group in
 376  * analyse_prot().
 377  */
 378 typedef struct gmx_help_make_index_group
 379 {
 380     /** The set of atom names that will be used to form this index group */
 381     const char **defining_atomnames;
 382     /** Size of the defining_atomnames array */
 383     const int    num_defining_atomnames;
 384     /** Name of this index group */
 385     const char  *group_name;
 386     /** Whether the above atom names name the atoms in the group, or
 387         those not in the group */
 388     gmx_bool bTakeComplement;
 389     /** The index in wholename gives the first item in the arrays of
 390        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 391        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 392        This is comparable to using a '*' wildcard at the end of specific
 393        atom names, but that is more involved to implement...
 394      */
 395     int wholename;
 396     /** Only create this index group if it differs from the one specified in compareto,
 397        where -1 means to always create this group. */
 398     int compareto;
 399 } t_gmx_help_make_index_group;
 400
 401 static void analyse_prot(const char ** restype, t_atoms *atoms,
 402                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 403 {
 404     /* lists of atomnames to be used in constructing index groups: */
 405     static const char *pnoh[]    = { "H", "HN" };
 406     static const char *pnodum[]  = {
 407         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 408         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 409     };
 410     static const char *calpha[]  = { "CA" };
 411     static const char *bb[]      = { "N", "CA", "C" };
 412     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 413     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 414     static const char *mch[]     = {
 415         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 416         "H1", "H2", "H3", "H", "HN"
 417     };
 418
 419     static const t_gmx_help_make_index_group constructing_data[] =
 420     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 421      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 422      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 423      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 424      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 425      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 426      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 427      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 428      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 429      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 430     const int   num_index_groups = asize(constructing_data);
 431
 432     int         n, j;
 433     atom_id    *aid;
 434     int         nra, nnpres, npres;
 435     gmx_bool    match;
 436     char        ndx_name[STRLEN], *atnm;
 437     int         i;
 438
 439     if (bVerb)
 440     {
 441         printf("Analysing Protein...\n");
 442     }
 443     snew(aid, atoms->nr);
 444
 445     /* calculate the number of protein residues */
 446     npres = 0;
 447     for (i = 0; (i < atoms->nres); i++)
 448     {
 449         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 450         {
 451             npres++;
 452         }
 453     }
 454     /* find matching or complement atoms */
 455     for (i = 0; (i < (int)num_index_groups); i++)
 456     {
 457         nra = 0;
 458         for (n = 0; (n < atoms->nr); n++)
 459         {
 460             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 461             {
 462                 match = FALSE;
 463                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 464                 {
 465                     /* skip digits at beginning of atomname, e.g. 1H */
 466                     atnm = *atoms->atomname[n];
 467                     while (isdigit(atnm[0]))
 468                     {
 469                         atnm++;
 470                     }
 471                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 472                     {
 473                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 474                         {
 475                             match = TRUE;
 476                         }
 477                     }
 478                     else
 479                     {
 480                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 481                         {
 482                             match = TRUE;
 483                         }
 484                     }
 485                 }
 486                 if (constructing_data[i].bTakeComplement != match)
 487                 {
 488                     aid[nra++] = n;
 489                 }
 490             }
 491         }
 492         /* if we want to add this group always or it differs from previous
 493            group, add it: */
 494         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 495         {
 496             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 497         }
 498     }
 499
 500     if (bASK)
 501     {
 502         for (i = 0; (i < (int)num_index_groups); i++)
 503         {
 504             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 505             if (gmx_ask_yesno(bASK))
 506             {
 507                 int resind;
 508                 nra = 0;
 509                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 510                 {
 511                     resind = atoms->atom[n].resind;
 512                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 513                     {
 514                         match = FALSE;
 515                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 516                         {
 517                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 518                             {
 519                                 match = TRUE;
 520                             }
 521                         }
 522                         if (constructing_data[i].bTakeComplement != match)
 523                         {
 524                             aid[nra++] = n;
 525                         }
 526                     }
 527                     /* copy the residuename to the tail of the groupname */
 528                     if (nra > 0)
 529                     {
 530                         t_resinfo *ri;
 531                         ri = &atoms->resinfo[resind];
 532                         sprintf(ndx_name, "%s_%s%d%c",
 533                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 534                         add_grp(gb, gn, nra, aid, ndx_name);
 535                         nra = 0;
 536                     }
 537                 }
 538             }
 539         }
 540         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 541         if (gmx_ask_yesno(bASK))
 542         {
 543             /* Make swap sidechain C=O index */
 544             int resind, hold;
 545             nra = 0;
 546             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 547             {
 548                 resind = atoms->atom[n].resind;
 549                 hold   = -1;
 550                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 551                 {
 552                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 553                     {
 554                         aid[nra++] = n;
 555                         hold       = nra;
 556                         nra       += 2;
 557                     }
 558                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 559                     {
 560                         if (hold == -1)
 561                         {
 562                             gmx_incons("Atom naming problem");
 563                         }
 564                         aid[hold] = n;
 565                     }
 566                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 567                     {
 568                         if (hold == -1)
 569                         {
 570                             gmx_incons("Atom naming problem");
 571                         }
 572                         aid[hold+1] = n;
 573                     }
 574                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 575                     {
 576                         if (hold == -1)
 577                         {
 578                             gmx_incons("Atom naming problem");
 579                         }
 580                         aid[hold+1] = n;
 581                     }
 582                     else
 583                     {
 584                         aid[nra++] = n;
 585                     }
 586                 }
 587             }
 588             /* copy the residuename to the tail of the groupname */
 589             if (nra > 0)
 590             {
 591                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 592                 nra = 0;
 593             }
 594         }
 595     }
 596     sfree(aid);
 597 }
 598
 599
 600
 601
 602 /* Return 0 if the name was found, otherwise -1.
 603  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 604  */
 605 int
 606 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 607 {
 608     int    i, rc;
 609
 610     rc = -1;
 611     for (i = 0; i < rt->n && rc; i++)
 612     {
 613         rc = gmx_strcasecmp(rt->resname[i], resname);
 614     }
 615
 616     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 617
 618     return rc;
 619 }
 620
 621 int
 622 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 623 {
 624     int           i;
 625     int           found;
 626     const char *  p_oldtype;
 627
 628     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 629
 630     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 631     {
 632         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 633                 newresname, p_oldtype, newrestype);
 634     }
 635
 636     if (found == 0)
 637     {
 638         srenew(rt->resname, rt->n+1);
 639         srenew(rt->restype, rt->n+1);
 640         rt->resname[rt->n] = strdup(newresname);
 641         rt->restype[rt->n] = strdup(newrestype);
 642         rt->n++;
 643     }
 644
 645     return 0;
 646 }
 647
 648
 649 int
 650 gmx_residuetype_init(gmx_residuetype_t *prt)
 651 {
 652     FILE                 *  db;
 653     char                    line[STRLEN];
 654     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 655     char                 *  p;
 656     int                     i;
 657     struct gmx_residuetype *rt;
 658
 659     snew(rt, 1);
 660     *prt = rt;
 661
 662     rt->n        = 0;
 663     rt->resname  = NULL;
 664     rt->restype  = NULL;
 665
 666     db = libopen("residuetypes.dat");
 667
 668     while (get_a_line(db, line, STRLEN))
 669     {
 670         strip_comment(line);
 671         trim(line);
 672         if (line[0] != '\0')
 673         {
 674             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 675             {
 676                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 677             }
 678             gmx_residuetype_add(rt, resname, restype);
 679         }
 680     }
 681
 682     fclose(db);
 683
 684     return 0;
 685 }
 686
 687
 688
 689 int
 690 gmx_residuetype_destroy(gmx_residuetype_t rt)
 691 {
 692     int i;
 693
 694     for (i = 0; i < rt->n; i++)
 695     {
 696         sfree(rt->resname[i]);
 697         sfree(rt->restype[i]);
 698     }
 699     sfree(rt->resname);
 700     sfree(rt->restype);
 701     sfree(rt);
 702
 703     return 0;
 704 }
 705
 706 int
 707 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 708                              const char ***       p_typenames,
 709                              int *                ntypes)
 710 {
 711     int            i, j, n;
 712     int            found;
 713     const char **  my_typename;
 714     char       *   p;
 715
 716     n = 0;
 717
 718     my_typename = NULL;
 719     for (i = 0; i < rt->n; i++)
 720     {
 721         p     = rt->restype[i];
 722         found = 0;
 723         for (j = 0; j < n && !found; j++)
 724         {
 725             found = !gmx_strcasecmp(p, my_typename[j]);
 726         }
 727
 728         if (!found)
 729         {
 730             srenew(my_typename, n+1);
 731             my_typename[n] = p;
 732             n++;
 733         }
 734     }
 735     *ntypes      = n;
 736     *p_typenames = my_typename;
 737
 738     return 0;
 739 }
 740
 741
 742
 743 gmx_bool
 744 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 745 {
 746     gmx_bool    rc;
 747     const char *p_type;
 748
 749     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 750         gmx_strcasecmp(p_type, "Protein") == 0)
 751     {
 752         rc = TRUE;
 753     }
 754     else
 755     {
 756         rc = FALSE;
 757     }
 758     return rc;
 759 }
 760
 761 gmx_bool
 762 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 763 {
 764     gmx_bool    rc;
 765     const char *p_type;
 766
 767     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 768         gmx_strcasecmp(p_type, "DNA") == 0)
 769     {
 770         rc = TRUE;
 771     }
 772     else
 773     {
 774         rc = FALSE;
 775     }
 776     return rc;
 777 }
 778
 779 gmx_bool
 780 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 781 {
 782     gmx_bool    rc;
 783     const char *p_type;
 784
 785     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 786         gmx_strcasecmp(p_type, "RNA") == 0)
 787     {
 788         rc = TRUE;
 789     }
 790     else
 791     {
 792         rc = FALSE;
 793     }
 794     return rc;
 795 }
 796
 797 /* Return the size of the arrays */
 798 int
 799 gmx_residuetype_get_size(gmx_residuetype_t rt)
 800 {
 801     return rt->n;
 802 }
 803
 804 /* Search for a residuetype with name resnm within the
 805  * gmx_residuetype database. Return the index if found,
 806  * otherwise -1.
 807  */
 808 int
 809 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 810 {
 811     int i, rc;
 812
 813     rc = -1;
 814     for (i = 0; i < rt->n && rc; i++)
 815     {
 816         rc = gmx_strcasecmp(rt->resname[i], resnm);
 817     }
 818
 819     return (0 == rc) ? i-1 : -1;
 820 }
 821
 822 /* Return the name of the residuetype with the given index, or
 823  * NULL if not found. */
 824 const char *
 825 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 826 {
 827     if (index >= 0 && index < rt->n)
 828     {
 829         return rt->resname[index];
 830     }
 831     else
 832     {
 833         return NULL;
 834     }
 835 }
 836
 837
 838
 839 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 840 {
 841     gmx_residuetype_t rt = NULL;
 842     char             *resnm;
 843     atom_id          *aid;
 844     const char    **  restype;
 845     int               nra;
 846     int               i, k;
 847     size_t            j;
 848     int               ntypes;
 849     char           *  p;
 850     const char     ** p_typename;
 851     int               iwater, iion;
 852     int               nwater, nion;
 853     int               found;
 854
 855     if (bVerb)
 856     {
 857         printf("Analysing residue names:\n");
 858     }
 859     /* Create system group, every single atom */
 860     snew(aid, atoms->nr);
 861     for (i = 0; i < atoms->nr; i++)
 862     {
 863         aid[i] = i;
 864     }
 865     add_grp(gb, gn, atoms->nr, aid, "System");
 866     sfree(aid);
 867
 868     /* For every residue, get a pointer to the residue type name */
 869     gmx_residuetype_init(&rt);
 870     assert(rt);
 871
 872     snew(restype, atoms->nres);
 873     ntypes     = 0;
 874     p_typename = NULL;
 875     for (i = 0; i < atoms->nres; i++)
 876     {
 877         resnm = *atoms->resinfo[i].name;
 878         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 879
 880         /* Note that this does not lead to a N*N loop, but N*K, where
 881          * K is the number of residue _types_, which is small and independent of N.
 882          */
 883         found = 0;
 884         for (k = 0; k < ntypes && !found; k++)
 885         {
 886             found = !strcmp(restype[i], p_typename[k]);
 887         }
 888         if (!found)
 889         {
 890             srenew(p_typename, ntypes+1);
 891             p_typename[ntypes++] = strdup(restype[i]);
 892         }
 893     }
 894
 895     if (bVerb)
 896     {
 897         p_status(restype, atoms->nres, p_typename, ntypes);
 898     }
 899
 900     for (k = 0; k < ntypes; k++)
 901     {
 902         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 903
 904         /* Check for special types to do fancy stuff with */
 905
 906         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 907         {
 908             sfree(aid);
 909             /* PROTEIN */
 910             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 911
 912             /* Create a Non-Protein group */
 913             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 914             if ((nra > 0) && (nra < atoms->nr))
 915             {
 916                 add_grp(gb, gn, nra, aid, "non-Protein");
 917             }
 918             sfree(aid);
 919         }
 920         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 921         {
 922             add_grp(gb, gn, nra, aid, p_typename[k]);
 923             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 924             add_grp(gb, gn, nra, aid, "SOL");
 925
 926             sfree(aid);
 927
 928             /* Solvent, create a negated group too */
 929             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 930             if ((nra > 0) && (nra < atoms->nr))
 931             {
 932                 add_grp(gb, gn, nra, aid, "non-Water");
 933             }
 934             sfree(aid);
 935         }
 936         else if (nra > 0)
 937         {
 938             /* Other groups */
 939             add_grp(gb, gn, nra, aid, p_typename[k]);
 940             sfree(aid);
 941             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 942         }
 943     }
 944
 945     sfree(p_typename);
 946     sfree(restype);
 947     gmx_residuetype_destroy(rt);
 948
 949     /* Create a merged water_and_ions group */
 950     iwater = -1;
 951     iion   = -1;
 952     nwater = 0;
 953     nion   = 0;
 954
 955     for (i = 0; i < gb->nr; i++)
 956     {
 957         if (!gmx_strcasecmp((*gn)[i], "Water"))
 958         {
 959             iwater = i;
 960             nwater = gb->index[i+1]-gb->index[i];
 961         }
 962         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 963         {
 964             iion = i;
 965             nion = gb->index[i+1]-gb->index[i];
 966         }
 967     }
 968
 969     if (nwater > 0 && nion > 0)
 970     {
 971         srenew(gb->index, gb->nr+2);
 972         srenew(*gn, gb->nr+1);
 973         (*gn)[gb->nr] = strdup("Water_and_ions");
 974         srenew(gb->a, gb->nra+nwater+nion);
 975         if (nwater > 0)
 976         {
 977             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 978             {
 979                 gb->a[gb->nra++] = gb->a[i];
 980             }
 981         }
 982         if (nion > 0)
 983         {
 984             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 985             {
 986                 gb->a[gb->nra++] = gb->a[i];
 987             }
 988         }
 989         gb->nr++;
 990         gb->index[gb->nr] = gb->nra;
 991     }
 992 }
 993
 994
 995 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 996 {
 997     int i;
 998
 999     for (i = 0; i < n; i++)
1000     {
1001         if (index[i] >= natoms)
1002         {
1003             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
1004                       gname ? gname : "Index", i+1, index[i]+1,
1005                       traj ? traj : "the trajectory", natoms);
1006         }
1007         else if (index[i] < 0)
1008         {
1009             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
1010                       gname ? gname : "Index", i+1, index[i]+1);
1011         }
1012     }
1013 }
1014
1015 t_blocka *init_index(const char *gfile, char ***grpname)
1016 {
1017     FILE      *in;
1018     t_blocka  *b;
1019     int        a, maxentries;
1020     int        i, j, ng, nread;
1021     char       line[STRLEN], *pt, str[STRLEN];
1022
1023     in = gmx_fio_fopen(gfile, "r");
1024     snew(b, 1);
1025     get_a_line(in, line, STRLEN);
1026     if (line[0] == '[')
1027     {
1028         /* new format */
1029         b->nr      = 0;
1030         b->index   = NULL;
1031         b->nra     = 0;
1032         b->a       = NULL;
1033         *grpname   = NULL;
1034         maxentries = 0;
1035         do
1036         {
1037             if (get_header(line, str))
1038             {
1039                 b->nr++;
1040                 srenew(b->index, b->nr+1);
1041                 srenew(*grpname, b->nr);
1042                 if (b->nr == 1)
1043                 {
1044                     b->index[0] = 0;
1045                 }
1046                 b->index[b->nr]     = b->index[b->nr-1];
1047                 (*grpname)[b->nr-1] = strdup(str);
1048             }
1049             else
1050             {
1051                 if (b->nr == 0)
1052                 {
1053                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1054                 }
1055                 pt = line;
1056                 while (sscanf(pt, "%s", str) == 1)
1057                 {
1058                     i = b->index[b->nr];
1059                     if (i >= maxentries)
1060                     {
1061                         maxentries += 1024;
1062                         srenew(b->a, maxentries);
1063                     }
1064                     b->a[i] = strtol(str, NULL, 10)-1;
1065                     b->index[b->nr]++;
1066                     (b->nra)++;
1067                     pt = strstr(pt, str)+strlen(str);
1068                 }
1069             }
1070         }
1071         while (get_a_line(in, line, STRLEN));
1072     }
1073     else
1074     {
1075         /* old format */
1076         sscanf(line, "%d%d", &b->nr, &b->nra);
1077         snew(b->index, b->nr+1);
1078         snew(*grpname, b->nr);
1079         b->index[0] = 0;
1080         snew(b->a, b->nra);
1081         for (i = 0; (i < b->nr); i++)
1082         {
1083             nread         = fscanf(in, "%s%d", str, &ng);
1084             (*grpname)[i] = strdup(str);
1085             b->index[i+1] = b->index[i]+ng;
1086             if (b->index[i+1] > b->nra)
1087             {
1088                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1089             }
1090             for (j = 0; (j < ng); j++)
1091             {
1092                 nread               = fscanf(in, "%d", &a);
1093                 b->a[b->index[i]+j] = a;
1094             }
1095         }
1096     }
1097     gmx_fio_fclose(in);
1098
1099     for (i = 0; (i < b->nr); i++)
1100     {
1101         for (j = b->index[i]; (j < b->index[i+1]); j++)
1102         {
1103             if (b->a[j] < 0)
1104             {
1105                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1106                         b->a[j], (*grpname)[i]);
1107             }
1108         }
1109     }
1110
1111     return b;
1112 }
1113
1114 static void minstring(char *str)
1115 {
1116     int i;
1117
1118     for (i = 0; (i < (int)strlen(str)); i++)
1119     {
1120         if (str[i] == '-')
1121         {
1122             str[i] = '_';
1123         }
1124     }
1125 }
1126
1127 int find_group(char s[], int ngrps, char **grpname)
1128 {
1129     int      aa, i, n;
1130     char     string[STRLEN];
1131     gmx_bool bMultiple;
1132
1133     bMultiple = FALSE;
1134     n         = strlen(s);
1135     aa        = NOTSET;
1136     /* first look for whole name match */
1137     if (aa == NOTSET)
1138     {
1139         for (i = 0; i < ngrps; i++)
1140         {
1141             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1142             {
1143                 if (aa != NOTSET)
1144                 {
1145                     bMultiple = TRUE;
1146                 }
1147                 aa = i;
1148             }
1149         }
1150     }
1151     /* second look for first string match */
1152     if (aa == NOTSET)
1153     {
1154         for (i = 0; i < ngrps; i++)
1155         {
1156             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1157             {
1158                 if (aa != NOTSET)
1159                 {
1160                     bMultiple = TRUE;
1161                 }
1162                 aa = i;
1163             }
1164         }
1165     }
1166     /* last look for arbitrary substring match */
1167     if (aa == NOTSET)
1168     {
1169         upstring(s);
1170         minstring(s);
1171         for (i = 0; i < ngrps; i++)
1172         {
1173             strcpy(string, grpname[i]);
1174             upstring(string);
1175             minstring(string);
1176             if (strstr(string, s) != NULL)
1177             {
1178                 if (aa != NOTSET)
1179                 {
1180                     bMultiple = TRUE;
1181                 }
1182                 aa = i;
1183             }
1184         }
1185     }
1186     if (bMultiple)
1187     {
1188         printf("Error: Multiple groups '%s' selected\n", s);
1189         aa = NOTSET;
1190     }
1191     return aa;
1192 }
1193
1194 static int qgroup(int *a, int ngrps, char **grpname)
1195 {
1196     char     s[STRLEN];
1197     int      aa;
1198     gmx_bool bInRange;
1199     char    *end;
1200
1201     do
1202     {
1203         fprintf(stderr, "Select a group: ");
1204         do
1205         {
1206             if (scanf("%s", s) != 1)
1207             {
1208                 gmx_fatal(FARGS, "Cannot read from input");
1209             }
1210             trim(s); /* remove spaces */
1211         }
1212         while (strlen(s) == 0);
1213         aa = strtol(s, &end, 10);
1214         if (aa == 0 && end[0] != '\0') /* string entered */
1215         {
1216             aa = find_group(s, ngrps, grpname);
1217         }
1218         bInRange = (aa >= 0 && aa < ngrps);
1219         if (!bInRange)
1220         {
1221             printf("Error: No such group '%s'\n", s);
1222         }
1223     }
1224     while (!bInRange);
1225     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1226     *a = aa;
1227     return aa;
1228 }
1229
1230 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1231                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1232 {
1233     int i, j, gnr1;
1234
1235     if (grps->nr == 0)
1236     {
1237         gmx_fatal(FARGS, "Error: no groups in indexfile");
1238     }
1239     for (i = 0; (i < grps->nr); i++)
1240     {
1241         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1242                 grps->index[i+1]-grps->index[i]);
1243     }
1244     for (i = 0; (i < ngrps); i++)
1245     {
1246         if (grps->nr > 1)
1247         {
1248             do
1249             {
1250                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1251                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1252                 {
1253                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1254                 }
1255             }
1256             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1257         }
1258         else
1259         {
1260             fprintf(stderr, "There is one group in the index\n");
1261             gnr1 = 0;
1262         }
1263         gnames[i] = strdup(grpname[gnr1]);
1264         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1265         snew(index[i], isize[i]);
1266         for (j = 0; (j < isize[i]); j++)
1267         {
1268             index[i][j] = grps->a[grps->index[gnr1]+j];
1269         }
1270     }
1271 }
1272
1273 void rd_index(const char *statfile, int ngrps, int isize[],
1274               atom_id *index[], char *grpnames[])
1275 {
1276     char    **gnames;
1277     t_blocka *grps;
1278     int      *grpnr;
1279
1280     snew(grpnr, ngrps);
1281     if (!statfile)
1282     {
1283         gmx_fatal(FARGS, "No index file specified");
1284     }
1285     grps = init_index(statfile, &gnames);
1286     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1287 }
1288
1289 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1290                   atom_id *index[], char *grpnames[], int grpnr[])
1291 {
1292     char    **gnames;
1293     t_blocka *grps;
1294
1295     if (!statfile)
1296     {
1297         gmx_fatal(FARGS, "No index file specified");
1298     }
1299     grps = init_index(statfile, &gnames);
1300
1301     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1302 }
1303
1304 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1305                int isize[], atom_id *index[], char *grpnames[])
1306 {
1307     char    ***gnames;
1308     t_blocka  *grps = NULL;
1309     int       *grpnr;
1310
1311     snew(grpnr, ngrps);
1312     snew(gnames, 1);
1313     if (fnm != NULL)
1314     {
1315         grps = init_index(fnm, gnames);
1316     }
1317     else if (atoms)
1318     {
1319         snew(grps, 1);
1320         snew(grps->index, 1);
1321         analyse(atoms, grps, gnames, FALSE, FALSE);
1322     }
1323     else
1324     {
1325         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1326     }
1327
1328     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1329 }
1330
1331 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1332 {
1333     t_cluster_ndx *c;
1334     int            i;
1335
1336     snew(c, 1);
1337     c->clust     = init_index(ndx, &c->grpname);
1338     c->maxframe  = -1;
1339     for (i = 0; (i < c->clust->nra); i++)
1340     {
1341         c->maxframe = max(c->maxframe, c->clust->a[i]);
1342     }
1343     fprintf(fplog ? fplog : stdout,
1344             "There are %d clusters containing %d structures, highest framenr is %d\n",
1345             c->clust->nr, c->clust->nra, c->maxframe);
1346     if (debug)
1347     {
1348         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1349         for (i = 0; (i < c->clust->nra); i++)
1350         {
1351             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1352             {
1353                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1354                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1355             }
1356         }
1357     }
1358     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1359
1360     return c;
1361 }