src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "cstringutil.h"
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44
  45 #include <cassert>
  46 #include <cctype>
  47 #include <cstring>
  48
  49 #include "gromacs/utility/basedefinitions.h"
  50 #include "gromacs/utility/fatalerror.h"
  51 #include "gromacs/utility/futil.h"
  52 #include "gromacs/utility/smalloc.h"
  53 #include "gromacs/utility/sysinfo.h"
  54
  55 int continuing(char *s)
  56 {
  57     int sl;
  58     assert(s);
  59
  60     rtrim(s);
  61     sl = strlen(s);
  62     if ((sl > 0) && (s[sl-1] == CONTINUE))
  63     {
  64         s[sl-1] = 0;
  65         return TRUE;
  66     }
  67     else
  68     {
  69         return FALSE;
  70     }
  71 }
  72
  73
  74
  75 char *fgets2(char *line, int n, FILE *stream)
  76 {
  77     char *c;
  78     if (fgets(line, n, stream) == NULL)
  79     {
  80         return NULL;
  81     }
  82     if ((c = strchr(line, '\n')) != NULL)
  83     {
  84         *c = '\0';
  85     }
  86     else
  87     {
  88         /* A line not ending in a newline can only occur at the end of a file,
  89          * or because of n being too small.
  90          * Since both cases occur very infrequently, we can check for EOF.
  91          */
  92         if (!feof(stream))
  93         {
  94             gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
  95         }
  96     }
  97     if ((c = strchr(line, '\r')) != NULL)
  98     {
  99         *c = '\0';
 100     }
 101
 102     return line;
 103 }
 104
 105 void strip_comment (char *line)
 106 {
 107     char *c;
 108
 109     if (!line)
 110     {
 111         return;
 112     }
 113
 114     /* search for a comment mark and replace it by a zero */
 115     if ((c = strchr(line, COMMENTSIGN)) != NULL)
 116     {
 117         (*c) = 0;
 118     }
 119 }
 120
 121 void upstring (char *str)
 122 {
 123     int i;
 124
 125     for (i = 0; (i < (int)strlen(str)); i++)
 126     {
 127         str[i] = toupper(str[i]);
 128     }
 129 }
 130
 131 void ltrim (char *str)
 132 {
 133     int   i, c;
 134
 135     if (NULL == str)
 136     {
 137         return;
 138     }
 139
 140     c = 0;
 141     while (('\0' != str[c]) && isspace(str[c]))
 142     {
 143         c++;
 144     }
 145     if (c > 0)
 146     {
 147         for (i = c; ('\0' != str[i]); i++)
 148         {
 149             str[i-c] = str[i];
 150         }
 151         str[i-c] = '\0';
 152     }
 153 }
 154
 155 void rtrim (char *str)
 156 {
 157     int nul;
 158
 159     if (NULL == str)
 160     {
 161         return;
 162     }
 163
 164     nul = strlen(str)-1;
 165     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 166     {
 167         str[nul] = '\0';
 168         nul--;
 169     }
 170 }
 171
 172 void trim (char *str)
 173 {
 174     ltrim (str);
 175     rtrim (str);
 176 }
 177
 178 void nice_header(FILE *out, const char *fn)
 179 {
 180     int            uid;
 181     char           userbuf[256];
 182     char           hostbuf[256];
 183     char           timebuf[STRLEN];
 184
 185     /* Print a nice header above the file */
 186     fprintf(out, "%c\n", COMMENTSIGN);
 187     fprintf(out, "%c\tFile '%s' was generated\n", COMMENTSIGN, fn ? fn : "unknown");
 188
 189     uid  = gmx_getuid();
 190     gmx_getusername(userbuf, 256);
 191     gmx_gethostname(hostbuf, 256);
 192     gmx_format_current_time(timebuf, STRLEN);
 193
 194     fprintf(out, "%c\tBy user: %s (%d)\n", COMMENTSIGN, userbuf, uid);
 195     fprintf(out, "%c\tOn host: %s\n", COMMENTSIGN, hostbuf);
 196     fprintf(out, "%c\tAt date: %s\n", COMMENTSIGN, timebuf);
 197     fprintf(out, "%c\n", COMMENTSIGN);
 198 }
 199
 200 int gmx_strcasecmp_min(const char *str1, const char *str2)
 201 {
 202     char ch1, ch2;
 203
 204     do
 205     {
 206         do
 207         {
 208             ch1 = toupper(*(str1++));
 209         }
 210         while ((ch1 == '-') || (ch1 == '_'));
 211         do
 212         {
 213             ch2 = toupper(*(str2++));
 214         }
 215         while ((ch2 == '-') || (ch2 == '_'));
 216
 217         if (ch1 != ch2)
 218         {
 219             return (ch1-ch2);
 220         }
 221     }
 222     while (ch1);
 223     return 0;
 224 }
 225
 226 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 227 {
 228     char  ch1, ch2;
 229     char *stri1, *stri2;
 230
 231     stri1 = (char *)str1;
 232     stri2 = (char *)str2;
 233     do
 234     {
 235         do
 236         {
 237             ch1 = toupper(*(str1++));
 238         }
 239         while ((ch1 == '-') || (ch1 == '_'));
 240         do
 241         {
 242             ch2 = toupper(*(str2++));
 243         }
 244         while ((ch2 == '-') || (ch2 == '_'));
 245
 246         if (ch1 != ch2)
 247         {
 248             return (ch1-ch2);
 249         }
 250     }
 251     while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 252     return 0;
 253 }
 254
 255 int gmx_strcasecmp(const char *str1, const char *str2)
 256 {
 257     char ch1, ch2;
 258
 259     do
 260     {
 261         ch1 = toupper(*(str1++));
 262         ch2 = toupper(*(str2++));
 263         if (ch1 != ch2)
 264         {
 265             return (ch1-ch2);
 266         }
 267     }
 268     while (ch1);
 269     return 0;
 270 }
 271
 272 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 273 {
 274     char ch1, ch2;
 275
 276     if (n == 0)
 277     {
 278         return 0;
 279     }
 280
 281     do
 282     {
 283         ch1 = toupper(*(str1++));
 284         ch2 = toupper(*(str2++));
 285         if (ch1 != ch2)
 286         {
 287             return (ch1-ch2);
 288         }
 289         n--;
 290     }
 291     while (ch1 && n);
 292     return 0;
 293 }
 294
 295 char *gmx_strdup(const char *src)
 296 {
 297     char *dest;
 298
 299     snew(dest, strlen(src)+1);
 300     strcpy(dest, src);
 301
 302     return dest;
 303 }
 304
 305 char *
 306 gmx_strndup(const char *src, int n)
 307 {
 308     int   len;
 309     char *dest;
 310
 311     len = strlen(src);
 312     if (len > n)
 313     {
 314         len = n;
 315     }
 316     snew(dest, len+1);
 317     strncpy(dest, src, len);
 318     dest[len] = 0;
 319     return dest;
 320 }
 321
 322 /* Magic hash init number for Dan J. Bernsteins algorithm.
 323  * Do NOT use any other value unless you really know what you are doing.
 324  */
 325 const unsigned int
 326     gmx_string_hash_init = 5381;
 327
 328
 329 unsigned int
 330 gmx_string_fullhash_func(const char *s, unsigned int hash_init)
 331 {
 332     int c;
 333
 334     while ((c = (*s++)) != '\0')
 335     {
 336         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 337     }
 338     return hash_init;
 339 }
 340
 341 unsigned int
 342 gmx_string_hash_func(const char *s, unsigned int hash_init)
 343 {
 344     int c;
 345
 346     while ((c = toupper(*s++)) != '\0')
 347     {
 348         if (isalnum(c))
 349         {
 350             hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 351         }
 352     }
 353     return hash_init;
 354 }
 355
 356 int
 357 gmx_wcmatch(const char *pattern, const char *str)
 358 {
 359     while (*pattern)
 360     {
 361         if (*pattern == '*')
 362         {
 363             /* Skip multiple wildcards in a sequence */
 364             while (*pattern == '*' || *pattern == '?')
 365             {
 366                 ++pattern;
 367                 /* For ?, we need to check that there are characters left
 368                  * in str. */
 369                 if (*pattern == '?')
 370                 {
 371                     if (*str == 0)
 372                     {
 373                         return GMX_NO_WCMATCH;
 374                     }
 375                     else
 376                     {
 377                         ++str;
 378                     }
 379                 }
 380             }
 381             /* If the pattern ends after the star, we have a match */
 382             if (*pattern == 0)
 383             {
 384                 return 0;
 385             }
 386             /* Match the rest against each possible suffix of str */
 387             while (*str)
 388             {
 389                 /* Only do the recursive call if the first character
 390                  * matches. We don't have to worry about wildcards here,
 391                  * since we have processed them above. */
 392                 if (*pattern == *str)
 393                 {
 394                     int rc;
 395                     /* Match the suffix, and return if a match or an error */
 396                     rc = gmx_wcmatch(pattern, str);
 397                     if (rc != GMX_NO_WCMATCH)
 398                     {
 399                         return rc;
 400                     }
 401                 }
 402                 ++str;
 403             }
 404             /* If no suffix of str matches, we don't have a match */
 405             return GMX_NO_WCMATCH;
 406         }
 407         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 408         {
 409             ++str;
 410         }
 411         else
 412         {
 413             return GMX_NO_WCMATCH;
 414         }
 415         ++pattern;
 416     }
 417     /* When the pattern runs out, we have a match if the string has ended. */
 418     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 419 }
 420
 421 char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 422 {
 423     char    *b2;
 424     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 425     gmx_bool bFirst, bFitsOnLine;
 426
 427     /* characters are copied from buf to b2 with possible spaces changed
 428      * into newlines and extra space added for indentation.
 429      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 430      * i0 points to the beginning of the current line (in buf, source)
 431      * lspace and l2space point to the last space on the current line
 432      * bFirst is set to prevent indentation of first line
 433      * bFitsOnLine says if the first space occurred before line_width, if
 434      * that is not the case, we have a word longer than line_width which
 435      * will also not fit on the next line, so we might as well keep it on
 436      * the current line (where it also won't fit, but looks better)
 437      */
 438
 439     b2    = NULL;
 440     b2len = strlen(buf)+1+indent;
 441     snew(b2, b2len);
 442     i0 = i2 = 0;
 443     if (bIndentFirst)
 444     {
 445         for (i2 = 0; (i2 < indent); i2++)
 446         {
 447             b2[i2] = ' ';
 448         }
 449     }
 450     bFirst = TRUE;
 451     do
 452     {
 453         l2space = -1;
 454         /* find the last space before end of line */
 455         for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 456         {
 457             b2[i2++] = buf[i];
 458             /* remember the position of a space */
 459             if (buf[i] == ' ')
 460             {
 461                 lspace  = i;
 462                 l2space = i2-1;
 463             }
 464             /* if we have a newline before the line is full, reset counters */
 465             if (buf[i] == '\n' && buf[i+1])
 466             {
 467                 i0     = i+1;
 468                 b2len += indent;
 469                 srenew(b2, b2len);
 470                 /* add indentation after the newline */
 471                 for (j = 0; (j < indent); j++)
 472                 {
 473                     b2[i2++] = ' ';
 474                 }
 475             }
 476         }
 477         /* If we are at the last newline, copy it */
 478         if (buf[i] == '\n' && !buf[i+1])
 479         {
 480             b2[i2++] = buf[i++];
 481         }
 482         /* if we're not at the end of the string */
 483         if (buf[i])
 484         {
 485             /* check if one word does not fit on the line */
 486             bFitsOnLine = (i-i0 <= line_width);
 487             /* reset line counters to just after the space */
 488             i0 = lspace+1;
 489             i2 = l2space+1;
 490             /* if the words fit on the line, and we're beyond the indentation part */
 491             if ( (bFitsOnLine) && (l2space >= indent) )
 492             {
 493                 /* start a new line */
 494                 b2[l2space] = '\n';
 495                 /* and add indentation */
 496                 if (indent)
 497                 {
 498                     if (bFirst)
 499                     {
 500                         line_width -= indent;
 501                         bFirst      = FALSE;
 502                     }
 503                     b2len += indent;
 504                     srenew(b2, b2len);
 505                     for (j = 0; (j < indent); j++)
 506                     {
 507                         b2[i2++] = ' ';
 508                     }
 509                     /* no extra spaces after indent; */
 510                     while (buf[i0] == ' ')
 511                     {
 512                         i0++;
 513                     }
 514                 }
 515             }
 516         }
 517     }
 518     while (buf[i]);
 519     b2[i2] = '\0';
 520
 521     return b2;
 522 }
 523
 524 gmx_int64_t
 525 str_to_int64_t(const char *str, char **endptr)
 526 {
 527 #ifndef _MSC_VER
 528     return strtoll(str, endptr, 10);
 529 #else
 530     return _strtoi64(str, endptr, 10);
 531 #endif
 532 }
 533
 534 char *gmx_step_str(gmx_int64_t i, char *buf)
 535 {
 536     sprintf(buf, "%" GMX_PRId64, i);
 537     return buf;
 538 }
 539
 540 void parse_digits_from_plain_string(const char *digitstring, int *ndigits, int **digitlist)
 541 {
 542     int i;
 543
 544     if (NULL == digitstring)
 545     {
 546         *ndigits   = 0;
 547         *digitlist = NULL;
 548         return;
 549     }
 550
 551     *ndigits = strlen(digitstring);
 552
 553     snew(*digitlist, *ndigits);
 554
 555     for (i = 0; i < *ndigits; i++)
 556     {
 557         if (digitstring[i] < '0' || digitstring[i] > '9')
 558         {
 559             gmx_fatal(FARGS, "Invalid character in digit-only string: '%c'\n",
 560                       digitstring[i]);
 561         }
 562         (*digitlist)[i] = digitstring[i] - '0';
 563     }
 564 }
 565
 566 static void parse_digits_from_csv_string(const char gmx_unused *digitstring, int gmx_unused *ndigits, int gmx_unused *digitlist)
 567 {
 568     /* TODO Implement csv format to support (e.g.) more than 10
 569        different GPUs in a node. */
 570     gmx_incons("Not implemented yet");
 571 }