src/gromacs/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <ctype.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "macros.h"
  46 #include "names.h"
  47 #include "string2.h"
  48 #include "main.h"
  49 #include "typedefs.h"
  50 #include "smalloc.h"
  51 #include "invblock.h"
  52 #include "macros.h"
  53 #include "index.h"
  54 #include "txtdump.h"
  55
  56 #include "gromacs/fileio/futil.h"
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/fileio/strdb.h"
  59
  60 const char gmx_residuetype_undefined[] = "Other";
  61
  62 struct gmx_residuetype
  63 {
  64     int      n;
  65     char **  resname;
  66     char **  restype;
  67
  68 };
  69
  70
  71 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  72 {
  73     char c;
  74
  75     if (bASK)
  76     {
  77         do
  78         {
  79             c = toupper(fgetc(stdin));
  80         }
  81         while ((c != 'Y') && (c != 'N'));
  82
  83         return (c == 'Y');
  84     }
  85     else
  86     {
  87         return FALSE;
  88     }
  89 }
  90
  91 t_blocka *new_blocka(void)
  92 {
  93     t_blocka *block;
  94
  95     snew(block, 1);
  96     snew(block->index, 1);
  97
  98     return block;
  99 }
 100
 101 void write_index(const char *outf, t_blocka *b, char **gnames)
 102 {
 103     FILE *out;
 104     int   i, j, k;
 105
 106     out = gmx_fio_fopen(outf, "w");
 107     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 108     for (i = 0; (i < b->nr); i++)
 109     {
 110         fprintf(out, "[ %s ]\n", gnames[i]);
 111         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 112         {
 113             fprintf(out, "%4d ", b->a[j]+1);
 114             if ((k % 15) == 14)
 115             {
 116                 fprintf(out, "\n");
 117             }
 118         }
 119         fprintf(out, "\n");
 120     }
 121     gmx_fio_fclose(out);
 122 }
 123
 124 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 125 {
 126     int i;
 127
 128     srenew(b->index, b->nr+2);
 129     srenew(*gnames, b->nr+1);
 130     (*gnames)[b->nr] = strdup(name);
 131
 132     srenew(b->a, b->nra+nra);
 133     for (i = 0; (i < nra); i++)
 134     {
 135         b->a[b->nra++] = a[i];
 136     }
 137     b->nr++;
 138     b->index[b->nr] = b->nra;
 139 }
 140
 141 /* compare index in `a' with group in `b' at `index',
 142    when `index'<0 it is relative to end of `b' */
 143 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 144 {
 145     int i;
 146
 147     if (index < 0)
 148     {
 149         index = b->nr-1+index;
 150     }
 151     if (index >= b->nr)
 152     {
 153         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 154     }
 155     /* compare sizes */
 156     if (nra != b->index[index+1] - b->index[index])
 157     {
 158         return FALSE;
 159     }
 160     for (i = 0; i < nra; i++)
 161     {
 162         if (a[i] != b->a[b->index[index]+i])
 163         {
 164             return FALSE;
 165         }
 166     }
 167     return TRUE;
 168 }
 169
 170 static void
 171 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 172 {
 173     int   i, j;
 174     int   found;
 175
 176     int * counter;
 177
 178     snew(counter, ntypes);
 179     for (i = 0; i < ntypes; i++)
 180     {
 181         counter[i] = 0;
 182     }
 183     for (i = 0; i < nres; i++)
 184     {
 185         found = 0;
 186         for (j = 0; j < ntypes; j++)
 187         {
 188             if (!gmx_strcasecmp(restype[i], typenames[j]))
 189             {
 190                 counter[j]++;
 191             }
 192         }
 193     }
 194
 195     for (i = 0; (i < ntypes); i++)
 196     {
 197         if (counter[i] > 0)
 198         {
 199             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 200         }
 201     }
 202
 203     sfree(counter);
 204 }
 205
 206
 207 atom_id *
 208 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 209 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 210 {
 211     atom_id *a;
 212     int      i;
 213     int      res;
 214
 215     snew(a, atoms->nr);
 216     *nra = 0;
 217     for (i = 0; (i < atoms->nr); i++)
 218     {
 219         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 220         if (bMatch == FALSE)
 221         {
 222             res = !res;
 223         }
 224         if (res)
 225         {
 226             a[(*nra)++] = i;
 227         }
 228     }
 229
 230     return a;
 231 }
 232
 233 typedef struct {
 234     char    *rname;
 235     gmx_bool bNeg;
 236     char    *gname;
 237 } restp_t;
 238
 239 static void analyse_other(const char ** restype, t_atoms *atoms,
 240                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 241 {
 242     restp_t *restp = NULL;
 243     char   **attp  = NULL;
 244     char    *rname, *aname;
 245     atom_id *other_ndx, *aid, *aaid;
 246     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 247
 248     for (i = 0; (i < atoms->nres); i++)
 249     {
 250         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 251         {
 252             break;
 253         }
 254     }
 255     if (i < atoms->nres)
 256     {
 257         /* we have others */
 258         if (bVerb)
 259         {
 260             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 261         }
 262         snew(other_ndx, atoms->nr);
 263         for (k = 0; (k < atoms->nr); k++)
 264         {
 265             resind = atoms->atom[k].resind;
 266             rname  = *atoms->resinfo[resind].name;
 267             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 268                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 269             {
 270
 271                 for (l = 0; (l < nrestp); l++)
 272                 {
 273                     if (strcmp(restp[l].rname, rname) == 0)
 274                     {
 275                         break;
 276                     }
 277                 }
 278                 if (l == nrestp)
 279                 {
 280                     srenew(restp, nrestp+1);
 281                     restp[nrestp].rname = strdup(rname);
 282                     restp[nrestp].bNeg  = FALSE;
 283                     restp[nrestp].gname = strdup(rname);
 284                     nrestp++;
 285
 286                 }
 287             }
 288         }
 289         for (i = 0; (i < nrestp); i++)
 290         {
 291             snew(aid, atoms->nr);
 292             naid = 0;
 293             for (j = 0; (j < atoms->nr); j++)
 294             {
 295                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 296                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 297                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 298                 {
 299                     aid[naid++] = j;
 300                 }
 301             }
 302             add_grp(gb, gn, naid, aid, restp[i].gname);
 303             if (bASK)
 304             {
 305                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 306                 fflush(stdout);
 307                 if (gmx_ask_yesno(bASK))
 308                 {
 309                     natp = 0;
 310                     for (k = 0; (k < naid); k++)
 311                     {
 312                         aname = *atoms->atomname[aid[k]];
 313                         for (l = 0; (l < natp); l++)
 314                         {
 315                             if (strcmp(aname, attp[l]) == 0)
 316                             {
 317                                 break;
 318                             }
 319                         }
 320                         if (l == natp)
 321                         {
 322                             srenew(attp, ++natp);
 323                             attp[natp-1] = aname;
 324                         }
 325                     }
 326                     if (natp > 1)
 327                     {
 328                         for (l = 0; (l < natp); l++)
 329                         {
 330                             snew(aaid, naid);
 331                             naaid = 0;
 332                             for (k = 0; (k < naid); k++)
 333                             {
 334                                 aname = *atoms->atomname[aid[k]];
 335                                 if (strcmp(aname, attp[l]) == 0)
 336                                 {
 337                                     aaid[naaid++] = aid[k];
 338                                 }
 339                             }
 340                             add_grp(gb, gn, naaid, aaid, attp[l]);
 341                             sfree(aaid);
 342                         }
 343                     }
 344                     sfree(attp);
 345                     attp = NULL;
 346                 }
 347                 sfree(aid);
 348             }
 349         }
 350         sfree(other_ndx);
 351     }
 352 }
 353
 354 /*! /brief Instances of this struct contain the data necessary to
 355  *         construct a single (protein) index group in
 356  *         analyse_prot(). */
 357 typedef struct gmx_help_make_index_group
 358 {
 359     /** The set of atom names that will be used to form this index group */
 360     const char **defining_atomnames;
 361     /** Size of the defining_atomnames array */
 362     const int    num_defining_atomnames;
 363     /** Name of this index group */
 364     const char  *group_name;
 365     /** Whether the above atom names name the atoms in the group, or
 366         those not in the group */
 367     gmx_bool bTakeComplement;
 368     /** The index in wholename gives the first item in the arrays of
 369        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 370        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 371        This is comparable to using a '*' wildcard at the end of specific
 372        atom names, but that is more involved to implement...
 373      */
 374     int wholename;
 375     /** Only create this index group if it differs from the one specified in compareto,
 376        where -1 means to always create this group. */
 377     int compareto;
 378 } t_gmx_help_make_index_group;
 379
 380 static void analyse_prot(const char ** restype, t_atoms *atoms,
 381                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 382 {
 383     /* lists of atomnames to be used in constructing index groups: */
 384     static const char *pnoh[]    = { "H", "HN" };
 385     static const char *pnodum[]  = {
 386         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 387         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 388     };
 389     static const char *calpha[]  = { "CA" };
 390     static const char *bb[]      = { "N", "CA", "C" };
 391     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 392     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 393     static const char *mch[]     = {
 394         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 395         "H1", "H2", "H3", "H", "HN"
 396     };
 397
 398     static const t_gmx_help_make_index_group constructing_data[] =
 399     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 400      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 401      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 402      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 403      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 404      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 405      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 406      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 407      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 408      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 409     const int   num_index_groups = asize(constructing_data);
 410
 411     int         n, j;
 412     atom_id    *aid;
 413     int         nra, nnpres, npres;
 414     gmx_bool    match;
 415     char        ndx_name[STRLEN], *atnm;
 416     int         i;
 417
 418     if (bVerb)
 419     {
 420         printf("Analysing Protein...\n");
 421     }
 422     snew(aid, atoms->nr);
 423
 424     /* calculate the number of protein residues */
 425     npres = 0;
 426     for (i = 0; (i < atoms->nres); i++)
 427     {
 428         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 429         {
 430             npres++;
 431         }
 432     }
 433     /* find matching or complement atoms */
 434     for (i = 0; (i < (int)num_index_groups); i++)
 435     {
 436         nra = 0;
 437         for (n = 0; (n < atoms->nr); n++)
 438         {
 439             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 440             {
 441                 match = FALSE;
 442                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 443                 {
 444                     /* skip digits at beginning of atomname, e.g. 1H */
 445                     atnm = *atoms->atomname[n];
 446                     while (isdigit(atnm[0]))
 447                     {
 448                         atnm++;
 449                     }
 450                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 451                     {
 452                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 453                         {
 454                             match = TRUE;
 455                         }
 456                     }
 457                     else
 458                     {
 459                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 460                         {
 461                             match = TRUE;
 462                         }
 463                     }
 464                 }
 465                 if (constructing_data[i].bTakeComplement != match)
 466                 {
 467                     aid[nra++] = n;
 468                 }
 469             }
 470         }
 471         /* if we want to add this group always or it differs from previous
 472            group, add it: */
 473         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 474         {
 475             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 476         }
 477     }
 478
 479     if (bASK)
 480     {
 481         for (i = 0; (i < (int)num_index_groups); i++)
 482         {
 483             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 484             if (gmx_ask_yesno(bASK))
 485             {
 486                 int resind;
 487                 nra = 0;
 488                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 489                 {
 490                     resind = atoms->atom[n].resind;
 491                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 492                     {
 493                         match = FALSE;
 494                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 495                         {
 496                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 497                             {
 498                                 match = TRUE;
 499                             }
 500                         }
 501                         if (constructing_data[i].bTakeComplement != match)
 502                         {
 503                             aid[nra++] = n;
 504                         }
 505                     }
 506                     /* copy the residuename to the tail of the groupname */
 507                     if (nra > 0)
 508                     {
 509                         t_resinfo *ri;
 510                         ri = &atoms->resinfo[resind];
 511                         sprintf(ndx_name, "%s_%s%d%c",
 512                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 513                         add_grp(gb, gn, nra, aid, ndx_name);
 514                         nra = 0;
 515                     }
 516                 }
 517             }
 518         }
 519         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 520         if (gmx_ask_yesno(bASK))
 521         {
 522             /* Make swap sidechain C=O index */
 523             int resind, hold;
 524             nra = 0;
 525             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 526             {
 527                 resind = atoms->atom[n].resind;
 528                 hold   = -1;
 529                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 530                 {
 531                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 532                     {
 533                         aid[nra++] = n;
 534                         hold       = nra;
 535                         nra       += 2;
 536                     }
 537                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 538                     {
 539                         if (hold == -1)
 540                         {
 541                             gmx_incons("Atom naming problem");
 542                         }
 543                         aid[hold] = n;
 544                     }
 545                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 546                     {
 547                         if (hold == -1)
 548                         {
 549                             gmx_incons("Atom naming problem");
 550                         }
 551                         aid[hold+1] = n;
 552                     }
 553                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 554                     {
 555                         if (hold == -1)
 556                         {
 557                             gmx_incons("Atom naming problem");
 558                         }
 559                         aid[hold+1] = n;
 560                     }
 561                     else
 562                     {
 563                         aid[nra++] = n;
 564                     }
 565                 }
 566             }
 567             /* copy the residuename to the tail of the groupname */
 568             if (nra > 0)
 569             {
 570                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 571                 nra = 0;
 572             }
 573         }
 574     }
 575     sfree(aid);
 576 }
 577
 578
 579
 580
 581 /* Return 0 if the name was found, otherwise -1.
 582  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 583  */
 584 int
 585 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 586 {
 587     int    i, rc;
 588
 589     rc = -1;
 590     for (i = 0; i < rt->n && rc; i++)
 591     {
 592         rc = gmx_strcasecmp(rt->resname[i], resname);
 593     }
 594
 595     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 596
 597     return rc;
 598 }
 599
 600 int
 601 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 602 {
 603     int           i;
 604     int           found;
 605     const char *  p_oldtype;
 606
 607     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 608
 609     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 610     {
 611         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 612                 newresname, p_oldtype, newrestype);
 613     }
 614
 615     if (found == 0)
 616     {
 617         srenew(rt->resname, rt->n+1);
 618         srenew(rt->restype, rt->n+1);
 619         rt->resname[rt->n] = strdup(newresname);
 620         rt->restype[rt->n] = strdup(newrestype);
 621         rt->n++;
 622     }
 623
 624     return 0;
 625 }
 626
 627
 628 int
 629 gmx_residuetype_init(gmx_residuetype_t *prt)
 630 {
 631     FILE                 *  db;
 632     char                    line[STRLEN];
 633     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 634     char                 *  p;
 635     int                     i;
 636     struct gmx_residuetype *rt;
 637
 638     snew(rt, 1);
 639     *prt = rt;
 640
 641     rt->n        = 0;
 642     rt->resname  = NULL;
 643     rt->restype  = NULL;
 644
 645     db = libopen("residuetypes.dat");
 646
 647     while (get_a_line(db, line, STRLEN))
 648     {
 649         strip_comment(line);
 650         trim(line);
 651         if (line[0] != '\0')
 652         {
 653             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 654             {
 655                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 656             }
 657             gmx_residuetype_add(rt, resname, restype);
 658         }
 659     }
 660
 661     fclose(db);
 662
 663     return 0;
 664 }
 665
 666
 667
 668 int
 669 gmx_residuetype_destroy(gmx_residuetype_t rt)
 670 {
 671     int i;
 672
 673     for (i = 0; i < rt->n; i++)
 674     {
 675         sfree(rt->resname[i]);
 676         sfree(rt->restype[i]);
 677     }
 678     sfree(rt->resname);
 679     sfree(rt->restype);
 680     sfree(rt);
 681
 682     return 0;
 683 }
 684
 685 int
 686 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 687                              const char ***       p_typenames,
 688                              int *                ntypes)
 689 {
 690     int            i, j, n;
 691     int            found;
 692     const char **  my_typename;
 693     char       *   p;
 694
 695     n = 0;
 696
 697     my_typename = NULL;
 698     for (i = 0; i < rt->n; i++)
 699     {
 700         p     = rt->restype[i];
 701         found = 0;
 702         for (j = 0; j < n && !found; j++)
 703         {
 704             found = !gmx_strcasecmp(p, my_typename[j]);
 705         }
 706
 707         if (!found)
 708         {
 709             srenew(my_typename, n+1);
 710             my_typename[n] = p;
 711             n++;
 712         }
 713     }
 714     *ntypes      = n;
 715     *p_typenames = my_typename;
 716
 717     return 0;
 718 }
 719
 720
 721
 722 gmx_bool
 723 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 724 {
 725     gmx_bool    rc;
 726     const char *p_type;
 727
 728     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 729         gmx_strcasecmp(p_type, "Protein") == 0)
 730     {
 731         rc = TRUE;
 732     }
 733     else
 734     {
 735         rc = FALSE;
 736     }
 737     return rc;
 738 }
 739
 740 gmx_bool
 741 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 742 {
 743     gmx_bool    rc;
 744     const char *p_type;
 745
 746     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 747         gmx_strcasecmp(p_type, "DNA") == 0)
 748     {
 749         rc = TRUE;
 750     }
 751     else
 752     {
 753         rc = FALSE;
 754     }
 755     return rc;
 756 }
 757
 758 gmx_bool
 759 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 760 {
 761     gmx_bool    rc;
 762     const char *p_type;
 763
 764     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 765         gmx_strcasecmp(p_type, "RNA") == 0)
 766     {
 767         rc = TRUE;
 768     }
 769     else
 770     {
 771         rc = FALSE;
 772     }
 773     return rc;
 774 }
 775
 776 /* Return the size of the arrays */
 777 int
 778 gmx_residuetype_get_size(gmx_residuetype_t rt)
 779 {
 780     return rt->n;
 781 }
 782
 783 /* Search for a residuetype with name resnm within the
 784  * gmx_residuetype database. Return the index if found,
 785  * otherwise -1.
 786  */
 787 int
 788 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 789 {
 790     int i, rc;
 791
 792     rc = -1;
 793     for (i = 0; i < rt->n && rc; i++)
 794     {
 795         rc = gmx_strcasecmp(rt->resname[i], resnm);
 796     }
 797
 798     return (0 == rc) ? i-1 : -1;
 799 }
 800
 801 /* Return the name of the residuetype with the given index, or
 802  * NULL if not found. */
 803 const char *
 804 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 805 {
 806     if (index >= 0 && index < rt->n)
 807     {
 808         return rt->resname[index];
 809     }
 810     else
 811     {
 812         return NULL;
 813     }
 814 }
 815
 816
 817
 818 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 819 {
 820     gmx_residuetype_t rt = NULL;
 821     char             *resnm;
 822     atom_id          *aid;
 823     const char    **  restype;
 824     int               nra;
 825     int               i, k;
 826     size_t            j;
 827     int               ntypes;
 828     char           *  p;
 829     const char     ** p_typename;
 830     int               iwater, iion;
 831     int               nwater, nion;
 832     int               found;
 833
 834     if (bVerb)
 835     {
 836         printf("Analysing residue names:\n");
 837     }
 838     /* Create system group, every single atom */
 839     snew(aid, atoms->nr);
 840     for (i = 0; i < atoms->nr; i++)
 841     {
 842         aid[i] = i;
 843     }
 844     add_grp(gb, gn, atoms->nr, aid, "System");
 845     sfree(aid);
 846
 847     /* For every residue, get a pointer to the residue type name */
 848     gmx_residuetype_init(&rt);
 849     assert(rt);
 850
 851     snew(restype, atoms->nres);
 852     ntypes     = 0;
 853     p_typename = NULL;
 854     for (i = 0; i < atoms->nres; i++)
 855     {
 856         resnm = *atoms->resinfo[i].name;
 857         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 858
 859         /* Note that this does not lead to a N*N loop, but N*K, where
 860          * K is the number of residue _types_, which is small and independent of N.
 861          */
 862         found = 0;
 863         for (k = 0; k < ntypes && !found; k++)
 864         {
 865             found = !strcmp(restype[i], p_typename[k]);
 866         }
 867         if (!found)
 868         {
 869             srenew(p_typename, ntypes+1);
 870             p_typename[ntypes++] = strdup(restype[i]);
 871         }
 872     }
 873
 874     if (bVerb)
 875     {
 876         p_status(restype, atoms->nres, p_typename, ntypes);
 877     }
 878
 879     for (k = 0; k < ntypes; k++)
 880     {
 881         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 882
 883         /* Check for special types to do fancy stuff with */
 884
 885         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 886         {
 887             sfree(aid);
 888             /* PROTEIN */
 889             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 890
 891             /* Create a Non-Protein group */
 892             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 893             if ((nra > 0) && (nra < atoms->nr))
 894             {
 895                 add_grp(gb, gn, nra, aid, "non-Protein");
 896             }
 897             sfree(aid);
 898         }
 899         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 900         {
 901             add_grp(gb, gn, nra, aid, p_typename[k]);
 902             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 903             add_grp(gb, gn, nra, aid, "SOL");
 904
 905             sfree(aid);
 906
 907             /* Solvent, create a negated group too */
 908             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 909             if ((nra > 0) && (nra < atoms->nr))
 910             {
 911                 add_grp(gb, gn, nra, aid, "non-Water");
 912             }
 913             sfree(aid);
 914         }
 915         else if (nra > 0)
 916         {
 917             /* Other groups */
 918             add_grp(gb, gn, nra, aid, p_typename[k]);
 919             sfree(aid);
 920             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 921         }
 922     }
 923
 924     sfree(p_typename);
 925     sfree(restype);
 926     gmx_residuetype_destroy(rt);
 927
 928     /* Create a merged water_and_ions group */
 929     iwater = -1;
 930     iion   = -1;
 931     nwater = 0;
 932     nion   = 0;
 933
 934     for (i = 0; i < gb->nr; i++)
 935     {
 936         if (!gmx_strcasecmp((*gn)[i], "Water"))
 937         {
 938             iwater = i;
 939             nwater = gb->index[i+1]-gb->index[i];
 940         }
 941         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 942         {
 943             iion = i;
 944             nion = gb->index[i+1]-gb->index[i];
 945         }
 946     }
 947
 948     if (nwater > 0 && nion > 0)
 949     {
 950         srenew(gb->index, gb->nr+2);
 951         srenew(*gn, gb->nr+1);
 952         (*gn)[gb->nr] = strdup("Water_and_ions");
 953         srenew(gb->a, gb->nra+nwater+nion);
 954         if (nwater > 0)
 955         {
 956             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 957             {
 958                 gb->a[gb->nra++] = gb->a[i];
 959             }
 960         }
 961         if (nion > 0)
 962         {
 963             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 964             {
 965                 gb->a[gb->nra++] = gb->a[i];
 966             }
 967         }
 968         gb->nr++;
 969         gb->index[gb->nr] = gb->nra;
 970     }
 971 }
 972
 973
 974 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 975 {
 976     int i;
 977
 978     for (i = 0; i < n; i++)
 979     {
 980         if (index[i] >= natoms)
 981         {
 982             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 983                       gname ? gname : "Index", i+1, index[i]+1,
 984                       traj ? traj : "the trajectory", natoms);
 985         }
 986         else if (index[i] < 0)
 987         {
 988             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 989                       gname ? gname : "Index", i+1, index[i]+1);
 990         }
 991     }
 992 }
 993
 994 t_blocka *init_index(const char *gfile, char ***grpname)
 995 {
 996     FILE      *in;
 997     t_blocka  *b;
 998     int        a, maxentries;
 999     int        i, j, ng, nread;
1000     char       line[STRLEN], *pt, str[STRLEN];
1001
1002     in = gmx_fio_fopen(gfile, "r");
1003     snew(b, 1);
1004     get_a_line(in, line, STRLEN);
1005     if (line[0] == '[')
1006     {
1007         /* new format */
1008         b->nr      = 0;
1009         b->index   = NULL;
1010         b->nra     = 0;
1011         b->a       = NULL;
1012         *grpname   = NULL;
1013         maxentries = 0;
1014         do
1015         {
1016             if (get_header(line, str))
1017             {
1018                 b->nr++;
1019                 srenew(b->index, b->nr+1);
1020                 srenew(*grpname, b->nr);
1021                 if (b->nr == 1)
1022                 {
1023                     b->index[0] = 0;
1024                 }
1025                 b->index[b->nr]     = b->index[b->nr-1];
1026                 (*grpname)[b->nr-1] = strdup(str);
1027             }
1028             else
1029             {
1030                 if (b->nr == 0)
1031                 {
1032                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1033                 }
1034                 pt = line;
1035                 while (sscanf(pt, "%s", str) == 1)
1036                 {
1037                     i = b->index[b->nr];
1038                     if (i >= maxentries)
1039                     {
1040                         maxentries += 1024;
1041                         srenew(b->a, maxentries);
1042                     }
1043                     b->a[i] = strtol(str, NULL, 10)-1;
1044                     b->index[b->nr]++;
1045                     (b->nra)++;
1046                     pt = strstr(pt, str)+strlen(str);
1047                 }
1048             }
1049         }
1050         while (get_a_line(in, line, STRLEN));
1051     }
1052     else
1053     {
1054         /* old format */
1055         sscanf(line, "%d%d", &b->nr, &b->nra);
1056         snew(b->index, b->nr+1);
1057         snew(*grpname, b->nr);
1058         b->index[0] = 0;
1059         snew(b->a, b->nra);
1060         for (i = 0; (i < b->nr); i++)
1061         {
1062             nread         = fscanf(in, "%s%d", str, &ng);
1063             (*grpname)[i] = strdup(str);
1064             b->index[i+1] = b->index[i]+ng;
1065             if (b->index[i+1] > b->nra)
1066             {
1067                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1068             }
1069             for (j = 0; (j < ng); j++)
1070             {
1071                 nread               = fscanf(in, "%d", &a);
1072                 b->a[b->index[i]+j] = a;
1073             }
1074         }
1075     }
1076     gmx_fio_fclose(in);
1077
1078     for (i = 0; (i < b->nr); i++)
1079     {
1080         for (j = b->index[i]; (j < b->index[i+1]); j++)
1081         {
1082             if (b->a[j] < 0)
1083             {
1084                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1085                         b->a[j], (*grpname)[i]);
1086             }
1087         }
1088     }
1089
1090     return b;
1091 }
1092
1093 static void minstring(char *str)
1094 {
1095     int i;
1096
1097     for (i = 0; (i < (int)strlen(str)); i++)
1098     {
1099         if (str[i] == '-')
1100         {
1101             str[i] = '_';
1102         }
1103     }
1104 }
1105
1106 int find_group(char s[], int ngrps, char **grpname)
1107 {
1108     int      aa, i, n;
1109     char     string[STRLEN];
1110     gmx_bool bMultiple;
1111
1112     bMultiple = FALSE;
1113     n         = strlen(s);
1114     aa        = NOTSET;
1115     /* first look for whole name match */
1116     if (aa == NOTSET)
1117     {
1118         for (i = 0; i < ngrps; i++)
1119         {
1120             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1121             {
1122                 if (aa != NOTSET)
1123                 {
1124                     bMultiple = TRUE;
1125                 }
1126                 aa = i;
1127             }
1128         }
1129     }
1130     /* second look for first string match */
1131     if (aa == NOTSET)
1132     {
1133         for (i = 0; i < ngrps; i++)
1134         {
1135             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1136             {
1137                 if (aa != NOTSET)
1138                 {
1139                     bMultiple = TRUE;
1140                 }
1141                 aa = i;
1142             }
1143         }
1144     }
1145     /* last look for arbitrary substring match */
1146     if (aa == NOTSET)
1147     {
1148         upstring(s);
1149         minstring(s);
1150         for (i = 0; i < ngrps; i++)
1151         {
1152             strcpy(string, grpname[i]);
1153             upstring(string);
1154             minstring(string);
1155             if (strstr(string, s) != NULL)
1156             {
1157                 if (aa != NOTSET)
1158                 {
1159                     bMultiple = TRUE;
1160                 }
1161                 aa = i;
1162             }
1163         }
1164     }
1165     if (bMultiple)
1166     {
1167         printf("Error: Multiple groups '%s' selected\n", s);
1168         aa = NOTSET;
1169     }
1170     return aa;
1171 }
1172
1173 static int qgroup(int *a, int ngrps, char **grpname)
1174 {
1175     char     s[STRLEN];
1176     int      aa;
1177     gmx_bool bInRange;
1178     char    *end;
1179
1180     do
1181     {
1182         fprintf(stderr, "Select a group: ");
1183         do
1184         {
1185             if (scanf("%s", s) != 1)
1186             {
1187                 gmx_fatal(FARGS, "Cannot read from input");
1188             }
1189             trim(s); /* remove spaces */
1190         }
1191         while (strlen(s) == 0);
1192         aa = strtol(s, &end, 10);
1193         if (aa == 0 && end[0] != '\0') /* string entered */
1194         {
1195             aa = find_group(s, ngrps, grpname);
1196         }
1197         bInRange = (aa >= 0 && aa < ngrps);
1198         if (!bInRange)
1199         {
1200             printf("Error: No such group '%s'\n", s);
1201         }
1202     }
1203     while (!bInRange);
1204     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1205     *a = aa;
1206     return aa;
1207 }
1208
1209 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1210                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1211 {
1212     int i, j, gnr1;
1213
1214     if (grps->nr == 0)
1215     {
1216         gmx_fatal(FARGS, "Error: no groups in indexfile");
1217     }
1218     for (i = 0; (i < grps->nr); i++)
1219     {
1220         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1221                 grps->index[i+1]-grps->index[i]);
1222     }
1223     for (i = 0; (i < ngrps); i++)
1224     {
1225         if (grps->nr > 1)
1226         {
1227             do
1228             {
1229                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1230                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1231                 {
1232                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1233                 }
1234             }
1235             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1236         }
1237         else
1238         {
1239             fprintf(stderr, "There is one group in the index\n");
1240             gnr1 = 0;
1241         }
1242         gnames[i] = strdup(grpname[gnr1]);
1243         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1244         snew(index[i], isize[i]);
1245         for (j = 0; (j < isize[i]); j++)
1246         {
1247             index[i][j] = grps->a[grps->index[gnr1]+j];
1248         }
1249     }
1250 }
1251
1252 void rd_index(const char *statfile, int ngrps, int isize[],
1253               atom_id *index[], char *grpnames[])
1254 {
1255     char    **gnames;
1256     t_blocka *grps;
1257     int      *grpnr;
1258
1259     snew(grpnr, ngrps);
1260     if (!statfile)
1261     {
1262         gmx_fatal(FARGS, "No index file specified");
1263     }
1264     grps = init_index(statfile, &gnames);
1265     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1266 }
1267
1268 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1269                   atom_id *index[], char *grpnames[], int grpnr[])
1270 {
1271     char    **gnames;
1272     t_blocka *grps;
1273
1274     if (!statfile)
1275     {
1276         gmx_fatal(FARGS, "No index file specified");
1277     }
1278     grps = init_index(statfile, &gnames);
1279
1280     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1281 }
1282
1283 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1284                int isize[], atom_id *index[], char *grpnames[])
1285 {
1286     char    ***gnames;
1287     t_blocka  *grps = NULL;
1288     int       *grpnr;
1289
1290     snew(grpnr, ngrps);
1291     snew(gnames, 1);
1292     if (fnm != NULL)
1293     {
1294         grps = init_index(fnm, gnames);
1295     }
1296     else if (atoms)
1297     {
1298         snew(grps, 1);
1299         snew(grps->index, 1);
1300         analyse(atoms, grps, gnames, FALSE, FALSE);
1301     }
1302     else
1303     {
1304         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1305     }
1306
1307     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1308 }
1309
1310 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1311 {
1312     t_cluster_ndx *c;
1313     int            i;
1314
1315     snew(c, 1);
1316     c->clust     = init_index(ndx, &c->grpname);
1317     c->maxframe  = -1;
1318     for (i = 0; (i < c->clust->nra); i++)
1319     {
1320         c->maxframe = max(c->maxframe, c->clust->a[i]);
1321     }
1322     fprintf(fplog ? fplog : stdout,
1323             "There are %d clusters containing %d structures, highest framenr is %d\n",
1324             c->clust->nr, c->clust->nra, c->maxframe);
1325     if (debug)
1326     {
1327         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1328         for (i = 0; (i < c->clust->nra); i++)
1329         {
1330             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1331             {
1332                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1333                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1334             }
1335         }
1336     }
1337     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1338
1339     return c;
1340 }