src/gmxlib/string2.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42 #include "visibility.h"
  43
  44 #ifdef GMX_CRAY_XT3
  45 #undef HAVE_PWD_H
  46 #endif
  47
  48 #include <stdio.h>
  49 #include <ctype.h>
  50 #include <stdlib.h>
  51 #include <errno.h>
  52 #include <sys/types.h>
  53 #include <time.h>
  54
  55 #ifdef HAVE_SYS_TIME_H
  56 #include <sys/time.h>
  57 #endif
  58
  59
  60 #ifdef HAVE_PWD_H
  61 #include <pwd.h>
  62 #endif
  63 #include <time.h>
  64 #include <assert.h>
  65
  66 #include "typedefs.h"
  67 #include "smalloc.h"
  68 #include "gmx_fatal.h"
  69 #include "macros.h"
  70 #include "string2.h"
  71 #include "futil.h"
  72
  73 int continuing(char *s)
  74 /* strip trailing spaces and if s ends with a CONTINUE remove that too.
  75  * returns TRUE if s ends with a CONTINUE, FALSE otherwise.
  76  */
  77 {
  78     int sl;
  79     assert(s);
  80
  81     rtrim(s);
  82     sl = strlen(s);
  83     if ((sl > 0) && (s[sl-1] == CONTINUE))
  84     {
  85         s[sl-1] = 0;
  86         return TRUE;
  87     }
  88     else
  89     {
  90         return FALSE;
  91     }
  92 }
  93
  94
  95
  96 char *fgets2(char *line, int n, FILE *stream)
  97 /* This routine reads a string from stream of max length n
  98  * and zero terminated, without newlines
  99  * line should be long enough (>= n)
 100  */
 101 {
 102     char *c;
 103     if (fgets(line, n, stream) == NULL)
 104     {
 105         return NULL;
 106     }
 107     if ((c = strchr(line, '\n')) != NULL)
 108     {
 109         *c = '\0';
 110     }
 111     else
 112     {
 113         /* A line not ending in a newline can only occur at the end of a file,
 114          * or because of n being too small.
 115          * Since both cases occur very infrequently, we can check for EOF.
 116          */
 117         if (!gmx_eof(stream))
 118         {
 119             gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
 120         }
 121     }
 122     if ((c = strchr(line, '\r')) != NULL)
 123     {
 124         *c = '\0';
 125     }
 126
 127     return line;
 128 }
 129
 130 void strip_comment (char *line)
 131 {
 132     char *c;
 133
 134     if (!line)
 135     {
 136         return;
 137     }
 138
 139     /* search for a comment mark and replace it by a zero */
 140     if ((c = strchr(line, COMMENTSIGN)) != NULL)
 141     {
 142         (*c) = 0;
 143     }
 144 }
 145
 146 void upstring (char *str)
 147 {
 148     int i;
 149
 150     for (i = 0; (i < (int)strlen(str)); i++)
 151     {
 152         str[i] = toupper(str[i]);
 153     }
 154 }
 155
 156 void ltrim (char *str)
 157 {
 158     char *tr;
 159     int   i, c;
 160
 161     if (NULL == str)
 162     {
 163         return;
 164     }
 165
 166     c = 0;
 167     while (('\0' != str[c]) && isspace(str[c]))
 168     {
 169         c++;
 170     }
 171     if (c > 0)
 172     {
 173         for (i = c; ('\0' != str[i]); i++)
 174         {
 175             str[i-c] = str[i];
 176         }
 177         str[i-c] = '\0';
 178     }
 179 }
 180
 181 void rtrim (char *str)
 182 {
 183     int nul;
 184
 185     if (NULL == str)
 186     {
 187         return;
 188     }
 189
 190     nul = strlen(str)-1;
 191     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 192     {
 193         str[nul] = '\0';
 194         nul--;
 195     }
 196 }
 197
 198 void trim (char *str)
 199 {
 200     ltrim (str);
 201     rtrim (str);
 202 }
 203
 204 GMX_LIBGMX_EXPORT
 205 char *
 206 gmx_ctime_r(const time_t *clock, char *buf, int n)
 207 {
 208     char tmpbuf[STRLEN];
 209
 210 #ifdef GMX_NATIVE_WINDOWS
 211     /* Windows */
 212     ctime_s( tmpbuf, STRLEN, clock );
 213 #elif (defined(__sun))
 214     /*Solaris*/
 215     ctime_r(clock, tmpbuf, n);
 216 #else
 217     ctime_r(clock, tmpbuf);
 218 #endif
 219     strncpy(buf, tmpbuf, n-1);
 220     buf[n-1] = '\0';
 221
 222     return buf;
 223 }
 224
 225 void nice_header (FILE *out, const char *fn)
 226 {
 227     const char    *unk = "onbekend";
 228     time_t         clock;
 229     const char    *user = unk;
 230     int            gh;
 231     uid_t          uid;
 232     char           buf[256] = "";
 233     char           timebuf[STRLEN];
 234 #ifdef HAVE_PWD_H
 235     struct passwd *pw;
 236 #endif
 237
 238     /* Print a nice header above the file */
 239     time(&clock);
 240     fprintf (out, "%c\n", COMMENTSIGN);
 241     fprintf (out, "%c\tFile '%s' was generated\n", COMMENTSIGN, fn ? fn : unk);
 242
 243 #ifdef HAVE_PWD_H
 244     uid  = getuid();
 245     pw   = getpwuid(uid);
 246     gh   = gethostname(buf, 255);
 247     /* pw returns null on error (e.g. compute nodes lack /etc/passwd) */
 248     user = pw ? pw->pw_name : unk;
 249 #else
 250     uid = 0;
 251     gh  = -1;
 252 #endif
 253
 254     gmx_ctime_r(&clock, timebuf, STRLEN);
 255     fprintf (out, "%c\tBy user: %s (%d)\n", COMMENTSIGN,
 256              user ? user : unk, (int) uid);
 257     fprintf(out, "%c\tOn host: %s\n", COMMENTSIGN, (gh == 0) ? buf : unk);
 258
 259     fprintf (out, "%c\tAt date: %s", COMMENTSIGN, timebuf);
 260     fprintf (out, "%c\n", COMMENTSIGN);
 261 }
 262
 263
 264 int gmx_strcasecmp_min(const char *str1, const char *str2)
 265 {
 266     char ch1, ch2;
 267
 268     do
 269     {
 270         do
 271         {
 272             ch1 = toupper(*(str1++));
 273         }
 274         while ((ch1 == '-') || (ch1 == '_'));
 275         do
 276         {
 277             ch2 = toupper(*(str2++));
 278         }
 279         while ((ch2 == '-') || (ch2 == '_'));
 280
 281         if (ch1 != ch2)
 282         {
 283             return (ch1-ch2);
 284         }
 285     }
 286     while (ch1);
 287     return 0;
 288 }
 289
 290 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 291 {
 292     char  ch1, ch2;
 293     char *stri1, *stri2;
 294
 295     stri1 = (char *)str1;
 296     stri2 = (char *)str2;
 297     do
 298     {
 299         do
 300         {
 301             ch1 = toupper(*(str1++));
 302         }
 303         while ((ch1 == '-') || (ch1 == '_'));
 304         do
 305         {
 306             ch2 = toupper(*(str2++));
 307         }
 308         while ((ch2 == '-') || (ch2 == '_'));
 309
 310         if (ch1 != ch2)
 311         {
 312             return (ch1-ch2);
 313         }
 314     }
 315     while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 316     return 0;
 317 }
 318
 319 int gmx_strcasecmp(const char *str1, const char *str2)
 320 {
 321     char ch1, ch2;
 322
 323     do
 324     {
 325         ch1 = toupper(*(str1++));
 326         ch2 = toupper(*(str2++));
 327         if (ch1 != ch2)
 328         {
 329             return (ch1-ch2);
 330         }
 331     }
 332     while (ch1);
 333     return 0;
 334 }
 335
 336 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 337 {
 338     char ch1, ch2;
 339
 340     if (n == 0)
 341     {
 342         return 0;
 343     }
 344
 345     do
 346     {
 347         ch1 = toupper(*(str1++));
 348         ch2 = toupper(*(str2++));
 349         if (ch1 != ch2)
 350         {
 351             return (ch1-ch2);
 352         }
 353         n--;
 354     }
 355     while (ch1 && n);
 356     return 0;
 357 }
 358
 359 char *gmx_strdup(const char *src)
 360 {
 361     char *dest;
 362
 363     snew(dest, strlen(src)+1);
 364     strcpy(dest, src);
 365
 366     return dest;
 367 }
 368
 369 char *
 370 gmx_strndup(const char *src, int n)
 371 {
 372     int   len;
 373     char *dest;
 374
 375     len = strlen(src);
 376     if (len > n)
 377     {
 378         len = n;
 379     }
 380     snew(dest, len+1);
 381     strncpy(dest, src, len);
 382     dest[len] = 0;
 383     return dest;
 384 }
 385
 386 /* Magic hash init number for Dan J. Bernsteins algorithm.
 387  * Do NOT use any other value unless you really know what you are doing.
 388  */
 389 const unsigned int
 390     gmx_string_hash_init = 5381;
 391
 392
 393 unsigned int
 394 gmx_string_hash_func(const char *s, unsigned int hash_init)
 395 {
 396     int c;
 397
 398     while ((c = toupper(*s++)) != '\0')
 399     {
 400         if (isalnum(c))
 401         {
 402             hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 403         }
 404     }
 405     return hash_init;
 406 }
 407
 408 /*!
 409  * \param[in] pattern  Pattern to match against.
 410  * \param[in] str      String to match.
 411  * \returns   0 on match, GMX_NO_WCMATCH if there is no match.
 412  *
 413  * Matches \p str against \p pattern, which may contain * and ? wildcards.
 414  * All other characters are matched literally.
 415  * Currently, it is not possible to match literal * or ?.
 416  */
 417 int
 418 gmx_wcmatch(const char *pattern, const char *str)
 419 {
 420     while (*pattern)
 421     {
 422         if (*pattern == '*')
 423         {
 424             /* Skip multiple wildcards in a sequence */
 425             while (*pattern == '*' || *pattern == '?')
 426             {
 427                 ++pattern;
 428                 /* For ?, we need to check that there are characters left
 429                  * in str. */
 430                 if (*pattern == '?')
 431                 {
 432                     if (*str == 0)
 433                     {
 434                         return GMX_NO_WCMATCH;
 435                     }
 436                     else
 437                     {
 438                         ++str;
 439                     }
 440                 }
 441             }
 442             /* If the pattern ends after the star, we have a match */
 443             if (*pattern == 0)
 444             {
 445                 return 0;
 446             }
 447             /* Match the rest against each possible suffix of str */
 448             while (*str)
 449             {
 450                 /* Only do the recursive call if the first character
 451                  * matches. We don't have to worry about wildcards here,
 452                  * since we have processed them above. */
 453                 if (*pattern == *str)
 454                 {
 455                     int rc;
 456                     /* Match the suffix, and return if a match or an error */
 457                     rc = gmx_wcmatch(pattern, str);
 458                     if (rc != GMX_NO_WCMATCH)
 459                     {
 460                         return rc;
 461                     }
 462                 }
 463                 ++str;
 464             }
 465             /* If no suffix of str matches, we don't have a match */
 466             return GMX_NO_WCMATCH;
 467         }
 468         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 469         {
 470             ++str;
 471         }
 472         else
 473         {
 474             return GMX_NO_WCMATCH;
 475         }
 476         ++pattern;
 477     }
 478     /* When the pattern runs out, we have a match if the string has ended. */
 479     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 480 }
 481
 482 char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 483 {
 484     char    *b2;
 485     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 486     gmx_bool bFirst, bFitsOnLine;
 487
 488     /* characters are copied from buf to b2 with possible spaces changed
 489      * into newlines and extra space added for indentation.
 490      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 491      * i0 points to the beginning of the current line (in buf, source)
 492      * lspace and l2space point to the last space on the current line
 493      * bFirst is set to prevent indentation of first line
 494      * bFitsOnLine says if the first space occurred before line_width, if
 495      * that is not the case, we have a word longer than line_width which
 496      * will also not fit on the next line, so we might as well keep it on
 497      * the current line (where it also won't fit, but looks better)
 498      */
 499
 500     b2    = NULL;
 501     b2len = strlen(buf)+1+indent;
 502     snew(b2, b2len);
 503     i0 = i2 = 0;
 504     if (bIndentFirst)
 505     {
 506         for (i2 = 0; (i2 < indent); i2++)
 507         {
 508             b2[i2] = ' ';
 509         }
 510     }
 511     bFirst = TRUE;
 512     do
 513     {
 514         l2space = -1;
 515         /* find the last space before end of line */
 516         for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 517         {
 518             b2[i2++] = buf[i];
 519             /* remember the position of a space */
 520             if (buf[i] == ' ')
 521             {
 522                 lspace  = i;
 523                 l2space = i2-1;
 524             }
 525             /* if we have a newline before the line is full, reset counters */
 526             if (buf[i] == '\n' && buf[i+1])
 527             {
 528                 i0     = i+1;
 529                 b2len += indent;
 530                 srenew(b2, b2len);
 531                 /* add indentation after the newline */
 532                 for (j = 0; (j < indent); j++)
 533                 {
 534                     b2[i2++] = ' ';
 535                 }
 536             }
 537         }
 538         /* If we are at the last newline, copy it */
 539         if (buf[i] == '\n' && !buf[i+1])
 540         {
 541             b2[i2++] = buf[i++];
 542         }
 543         /* if we're not at the end of the string */
 544         if (buf[i])
 545         {
 546             /* check if one word does not fit on the line */
 547             bFitsOnLine = (i-i0 <= line_width);
 548             /* reset line counters to just after the space */
 549             i0 = lspace+1;
 550             i2 = l2space+1;
 551             /* if the words fit on the line, and we're beyond the indentation part */
 552             if ( (bFitsOnLine) && (l2space >= indent) )
 553             {
 554                 /* start a new line */
 555                 b2[l2space] = '\n';
 556                 /* and add indentation */
 557                 if (indent)
 558                 {
 559                     if (bFirst)
 560                     {
 561                         line_width -= indent;
 562                         bFirst      = FALSE;
 563                     }
 564                     b2len += indent;
 565                     srenew(b2, b2len);
 566                     for (j = 0; (j < indent); j++)
 567                     {
 568                         b2[i2++] = ' ';
 569                     }
 570                     /* no extra spaces after indent; */
 571                     while (buf[i0] == ' ')
 572                     {
 573                         i0++;
 574                     }
 575                 }
 576             }
 577         }
 578     }
 579     while (buf[i]);
 580     b2[i2] = '\0';
 581
 582     return b2;
 583 }
 584
 585 char **split(char sep, char *str)
 586 {
 587     char **ptr = NULL;
 588     int    n, nn, nptr = 0;
 589
 590     if (str == NULL)
 591     {
 592         return NULL;
 593     }
 594     nn = strlen(str);
 595     for (n = 0; (n < nn); n++)
 596     {
 597         if (str[n] == sep)
 598         {
 599             nptr++;
 600         }
 601     }
 602     snew(ptr, nptr+2);
 603     nptr = 0;
 604     while (*str != '\0')
 605     {
 606         while ((*str != '\0') && (*str == sep))
 607         {
 608             str++;
 609         }
 610         if (*str != '\0')
 611         {
 612             snew(ptr[nptr], 1+strlen(str));
 613             n = 0;
 614             while ((*str != '\0') && (*str != sep))
 615             {
 616                 ptr[nptr][n] = *str;
 617                 str++;
 618                 n++;
 619             }
 620             ptr[nptr][n] = '\0';
 621             nptr++;
 622         }
 623     }
 624     ptr[nptr] = NULL;
 625
 626     return ptr;
 627 }
 628
 629
 630 gmx_large_int_t
 631 str_to_large_int_t(const char *str, char **endptr)
 632 {
 633     int              sign = 1;
 634     gmx_large_int_t  val  = 0;
 635     char             ch;
 636     const char      *p;
 637
 638     p = str;
 639     if (p == NULL)
 640     {
 641         *endptr = NULL;
 642         return 0;
 643     }
 644
 645     /* Strip off initial white space */
 646     while (isspace(*p))
 647     {
 648         p++;
 649     }
 650     /* Conform to ISO C99 - return original pointer if string does not contain a number */
 651     if (*str == '\0')
 652     {
 653         *endptr = (char *)str;
 654     }
 655
 656     if (*p == '-')
 657     {
 658         p++;
 659         sign *= -1;
 660     }
 661
 662     while ( ((ch = *p) != '\0') && isdigit(ch) )
 663     {
 664         /* Important to add sign here, so we dont overflow in final multiplication */
 665         ch  = (ch-'0')*sign;
 666         val = val*10 + ch;
 667         if (ch != val%10)
 668         {
 669             /* Some sort of overflow has occured, set endptr to original string */
 670             *endptr = (char *)str;
 671             errno   = ERANGE;
 672             return(0);
 673         }
 674         p++;
 675     }
 676
 677     *endptr = (char *)p;
 678
 679     return val;
 680 }
 681
 682 char *gmx_strsep(char **stringp, const char *delim)
 683 {
 684     char *ret;
 685     int   len = strlen(delim);
 686     int   i, j = 0;
 687     int   found = 0;
 688
 689     if (!*stringp)
 690     {
 691         return NULL;
 692     }
 693     ret = *stringp;
 694     do
 695     {
 696         if ( (*stringp)[j] == '\0')
 697         {
 698             found    = 1;
 699             *stringp = NULL;
 700             break;
 701         }
 702         for (i = 0; i < len; i++)
 703         {
 704             if ( (*stringp)[j] == delim[i])
 705             {
 706                 (*stringp)[j] = '\0';
 707                 *stringp      = *stringp+j+1;
 708                 found         = 1;
 709                 break;
 710             }
 711         }
 712         j++;
 713     }
 714     while (!found);
 715
 716     return ret;
 717 }