src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "cstringutil.h"
  41
  42 #include <cassert>
  43 #include <cctype>
  44 #include <cstdio>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include <string>
  49 #include <vector>
  50
  51 #include "gromacs/utility/basedefinitions.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/smalloc.h"
  55
  56 //! Comment sign to use.
  57 #define COMMENTSIGN ';'
  58
  59 int continuing(char *s)
  60 {
  61     int sl;
  62     assert(s);
  63
  64     rtrim(s);
  65     sl = strlen(s);
  66     if ((sl > 0) && (s[sl-1] == CONTINUE))
  67     {
  68         s[sl-1] = 0;
  69         return TRUE;
  70     }
  71     else
  72     {
  73         return FALSE;
  74     }
  75 }
  76
  77
  78
  79 char *fgets2(char *line, int n, FILE *stream)
  80 {
  81     char *c;
  82     if (fgets(line, n, stream) == nullptr)
  83     {
  84         return nullptr;
  85     }
  86     if ((c = strchr(line, '\n')) != nullptr)
  87     {
  88         *c = '\0';
  89     }
  90     else
  91     {
  92         /* A line not ending in a newline can only occur at the end of a file,
  93          * or because of n being too small.
  94          * Since both cases occur very infrequently, we can check for EOF.
  95          */
  96         if (!feof(stream))
  97         {
  98             gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
  99         }
 100     }
 101     if ((c = strchr(line, '\r')) != nullptr)
 102     {
 103         *c = '\0';
 104     }
 105
 106     return line;
 107 }
 108
 109 void strip_comment (char *line)
 110 {
 111     char *c;
 112
 113     if (!line)
 114     {
 115         return;
 116     }
 117
 118     /* search for a comment mark and replace it by a zero */
 119     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 120     {
 121         (*c) = 0;
 122     }
 123 }
 124
 125 void upstring (char *str)
 126 {
 127     int i;
 128
 129     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 130     {
 131         str[i] = toupper(str[i]);
 132     }
 133 }
 134
 135 void ltrim (char *str)
 136 {
 137     int   i, c;
 138
 139     if (nullptr == str)
 140     {
 141         return;
 142     }
 143
 144     c = 0;
 145     while (('\0' != str[c]) && isspace(str[c]))
 146     {
 147         c++;
 148     }
 149     if (c > 0)
 150     {
 151         for (i = c; ('\0' != str[i]); i++)
 152         {
 153             str[i-c] = str[i];
 154         }
 155         str[i-c] = '\0';
 156     }
 157 }
 158
 159 void rtrim (char *str)
 160 {
 161     int nul;
 162
 163     if (nullptr == str)
 164     {
 165         return;
 166     }
 167
 168     nul = strlen(str)-1;
 169     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 170     {
 171         str[nul] = '\0';
 172         nul--;
 173     }
 174 }
 175
 176 void trim (char *str)
 177 {
 178     ltrim (str);
 179     rtrim (str);
 180 }
 181
 182 int gmx_strcasecmp_min(const char *str1, const char *str2)
 183 {
 184     char ch1, ch2;
 185
 186     do
 187     {
 188         do
 189         {
 190             ch1 = toupper(*(str1++));
 191         }
 192         while ((ch1 == '-') || (ch1 == '_'));
 193         do
 194         {
 195             ch2 = toupper(*(str2++));
 196         }
 197         while ((ch2 == '-') || (ch2 == '_'));
 198
 199         if (ch1 != ch2)
 200         {
 201             return (ch1-ch2);
 202         }
 203     }
 204     while (ch1);
 205     return 0;
 206 }
 207
 208 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 209 {
 210     char  ch1, ch2;
 211     char *stri1, *stri2;
 212
 213     stri1 = const_cast<char *>(str1);
 214     stri2 = const_cast<char *>(str2);
 215     do
 216     {
 217         do
 218         {
 219             ch1 = toupper(*(str1++));
 220         }
 221         while ((ch1 == '-') || (ch1 == '_'));
 222         do
 223         {
 224             ch2 = toupper(*(str2++));
 225         }
 226         while ((ch2 == '-') || (ch2 == '_'));
 227
 228         if (ch1 != ch2)
 229         {
 230             return (ch1-ch2);
 231         }
 232     }
 233     while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 234     return 0;
 235 }
 236
 237 int gmx_strcasecmp(const char *str1, const char *str2)
 238 {
 239     char ch1, ch2;
 240
 241     do
 242     {
 243         ch1 = toupper(*(str1++));
 244         ch2 = toupper(*(str2++));
 245         if (ch1 != ch2)
 246         {
 247             return (ch1-ch2);
 248         }
 249     }
 250     while (ch1);
 251     return 0;
 252 }
 253
 254 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 255 {
 256     char ch1, ch2;
 257
 258     if (n == 0)
 259     {
 260         return 0;
 261     }
 262
 263     do
 264     {
 265         ch1 = toupper(*(str1++));
 266         ch2 = toupper(*(str2++));
 267         if (ch1 != ch2)
 268         {
 269             return (ch1-ch2);
 270         }
 271         n--;
 272     }
 273     while (ch1 && n);
 274     return 0;
 275 }
 276
 277 char *gmx_strdup(const char *src)
 278 {
 279     char *dest;
 280
 281     snew(dest, strlen(src)+1);
 282     strcpy(dest, src);
 283
 284     return dest;
 285 }
 286
 287 char *
 288 gmx_strndup(const char *src, int n)
 289 {
 290     int   len;
 291     char *dest;
 292
 293     len = strlen(src);
 294     if (len > n)
 295     {
 296         len = n;
 297     }
 298     snew(dest, len+1);
 299     strncpy(dest, src, len);
 300     dest[len] = 0;
 301     return dest;
 302 }
 303
 304 /* Magic hash init number for Dan J. Bernsteins algorithm.
 305  * Do NOT use any other value unless you really know what you are doing.
 306  */
 307 const unsigned int
 308     gmx_string_hash_init = 5381;
 309
 310
 311 unsigned int
 312 gmx_string_fullhash_func(const char *s, unsigned int hash_init)
 313 {
 314     int c;
 315
 316     while ((c = (*s++)) != '\0')
 317     {
 318         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 319     }
 320     return hash_init;
 321 }
 322
 323 unsigned int
 324 gmx_string_hash_func(const char *s, unsigned int hash_init)
 325 {
 326     int c;
 327
 328     while ((c = toupper(*s++)) != '\0')
 329     {
 330         if (isalnum(c))
 331         {
 332             hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 333         }
 334     }
 335     return hash_init;
 336 }
 337
 338 int
 339 gmx_wcmatch(const char *pattern, const char *str)
 340 {
 341     while (*pattern)
 342     {
 343         if (*pattern == '*')
 344         {
 345             /* Skip multiple wildcards in a sequence */
 346             while (*pattern == '*' || *pattern == '?')
 347             {
 348                 ++pattern;
 349                 /* For ?, we need to check that there are characters left
 350                  * in str. */
 351                 if (*pattern == '?')
 352                 {
 353                     if (*str == 0)
 354                     {
 355                         return GMX_NO_WCMATCH;
 356                     }
 357                     else
 358                     {
 359                         ++str;
 360                     }
 361                 }
 362             }
 363             /* If the pattern ends after the star, we have a match */
 364             if (*pattern == 0)
 365             {
 366                 return 0;
 367             }
 368             /* Match the rest against each possible suffix of str */
 369             while (*str)
 370             {
 371                 /* Only do the recursive call if the first character
 372                  * matches. We don't have to worry about wildcards here,
 373                  * since we have processed them above. */
 374                 if (*pattern == *str)
 375                 {
 376                     int rc;
 377                     /* Match the suffix, and return if a match or an error */
 378                     rc = gmx_wcmatch(pattern, str);
 379                     if (rc != GMX_NO_WCMATCH)
 380                     {
 381                         return rc;
 382                     }
 383                 }
 384                 ++str;
 385             }
 386             /* If no suffix of str matches, we don't have a match */
 387             return GMX_NO_WCMATCH;
 388         }
 389         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 390         {
 391             ++str;
 392         }
 393         else
 394         {
 395             return GMX_NO_WCMATCH;
 396         }
 397         ++pattern;
 398     }
 399     /* When the pattern runs out, we have a match if the string has ended. */
 400     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 401 }
 402
 403 char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 404 {
 405     char    *b2;
 406     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 407     gmx_bool bFirst, bFitsOnLine;
 408
 409     /* characters are copied from buf to b2 with possible spaces changed
 410      * into newlines and extra space added for indentation.
 411      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 412      * i0 points to the beginning of the current line (in buf, source)
 413      * lspace and l2space point to the last space on the current line
 414      * bFirst is set to prevent indentation of first line
 415      * bFitsOnLine says if the first space occurred before line_width, if
 416      * that is not the case, we have a word longer than line_width which
 417      * will also not fit on the next line, so we might as well keep it on
 418      * the current line (where it also won't fit, but looks better)
 419      */
 420
 421     b2    = nullptr;
 422     b2len = strlen(buf)+1+indent;
 423     snew(b2, b2len);
 424     i0 = i2 = 0;
 425     if (bIndentFirst)
 426     {
 427         for (i2 = 0; (i2 < indent); i2++)
 428         {
 429             b2[i2] = ' ';
 430         }
 431     }
 432     bFirst = TRUE;
 433     do
 434     {
 435         l2space = -1;
 436         /* find the last space before end of line */
 437         for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 438         {
 439             b2[i2++] = buf[i];
 440             /* remember the position of a space */
 441             if (buf[i] == ' ')
 442             {
 443                 lspace  = i;
 444                 l2space = i2-1;
 445             }
 446             /* if we have a newline before the line is full, reset counters */
 447             if (buf[i] == '\n' && buf[i+1])
 448             {
 449                 i0     = i+1;
 450                 b2len += indent;
 451                 srenew(b2, b2len);
 452                 /* add indentation after the newline */
 453                 for (j = 0; (j < indent); j++)
 454                 {
 455                     b2[i2++] = ' ';
 456                 }
 457             }
 458         }
 459         /* If we are at the last newline, copy it */
 460         if (buf[i] == '\n' && !buf[i+1])
 461         {
 462             b2[i2++] = buf[i++];
 463         }
 464         /* if we're not at the end of the string */
 465         if (buf[i])
 466         {
 467             /* check if one word does not fit on the line */
 468             bFitsOnLine = (i-i0 <= line_width);
 469             /* reset line counters to just after the space */
 470             i0 = lspace+1;
 471             i2 = l2space+1;
 472             /* if the words fit on the line, and we're beyond the indentation part */
 473             if ( (bFitsOnLine) && (l2space >= indent) )
 474             {
 475                 /* start a new line */
 476                 b2[l2space] = '\n';
 477                 /* and add indentation */
 478                 if (indent)
 479                 {
 480                     if (bFirst)
 481                     {
 482                         line_width -= indent;
 483                         bFirst      = FALSE;
 484                     }
 485                     b2len += indent;
 486                     srenew(b2, b2len);
 487                     for (j = 0; (j < indent); j++)
 488                     {
 489                         b2[i2++] = ' ';
 490                     }
 491                     /* no extra spaces after indent; */
 492                     while (buf[i0] == ' ')
 493                     {
 494                         i0++;
 495                     }
 496                 }
 497             }
 498         }
 499     }
 500     while (buf[i]);
 501     b2[i2] = '\0';
 502
 503     return b2;
 504 }
 505
 506 int64_t
 507 str_to_int64_t(const char *str, char **endptr)
 508 {
 509 #ifndef _MSC_VER
 510     return strtoll(str, endptr, 10);
 511 #else
 512     return _strtoi64(str, endptr, 10);
 513 #endif
 514 }
 515
 516 char *gmx_step_str(int64_t i, char *buf)
 517 {
 518     sprintf(buf, "%" PRId64, i);
 519     return buf;
 520 }