src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #include "gmxpre.h"
  40
  41 #include "cstringutil.h"
  42
  43 #include <cassert>
  44 #include <cctype>
  45 #include <cstdio>
  46 #include <cstdlib>
  47 #include <cstring>
  48
  49 #include <string>
  50 #include <vector>
  51
  52 #include "gromacs/utility/basedefinitions.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/futil.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 //! Comment sign to use.
  58 #define COMMENTSIGN ';'
  59
  60 int continuing(char* s)
  61 {
  62     assert(s);
  63
  64     rtrim(s);
  65     int sl = strlen(s);
  66     if ((sl > 0) && (s[sl - 1] == CONTINUE))
  67     {
  68         s[sl - 1] = 0;
  69         return TRUE;
  70     }
  71     else
  72     {
  73         return FALSE;
  74     }
  75 }
  76
  77
  78 char* fgets2(char* line, int n, FILE* stream)
  79 {
  80     char* c = nullptr;
  81     if (fgets(line, n, stream) == nullptr)
  82     {
  83         return nullptr;
  84     }
  85     if ((c = strchr(line, '\n')) != nullptr)
  86     {
  87         *c = '\0';
  88     }
  89     else
  90     {
  91         /* A line not ending in a newline can only occur at the end of a file,
  92          * or because of n being too small.
  93          * Since both cases occur very infrequently, we can check for EOF.
  94          */
  95         if (!feof(stream))
  96         {
  97             gmx_fatal(FARGS,
  98                       "An input file contains a line longer than %d characters, while the buffer "
  99                       "passed to fgets2 has size %d. The line starts with: '%20.20s'",
 100                       n,
 101                       n,
 102                       line);
 103         }
 104     }
 105     if ((c = strchr(line, '\r')) != nullptr)
 106     {
 107         *c = '\0';
 108     }
 109
 110     return line;
 111 }
 112
 113 void strip_comment(char* line)
 114 {
 115     char* c = nullptr;
 116
 117     if (!line)
 118     {
 119         return;
 120     }
 121
 122     /* search for a comment mark and replace it by a zero */
 123     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 124     {
 125         (*c) = 0;
 126     }
 127 }
 128
 129 void upstring(char* str)
 130 {
 131     for (int i = 0; (i < static_cast<int>(strlen(str))); i++)
 132     {
 133         str[i] = toupper(str[i]);
 134     }
 135 }
 136
 137 void ltrim(char* str)
 138 {
 139     if (nullptr == str)
 140     {
 141         return;
 142     }
 143
 144     int c = 0;
 145     while (('\0' != str[c]) && isspace(str[c]))
 146     {
 147         c++;
 148     }
 149     if (c > 0)
 150     {
 151         int i = c;
 152         for (; ('\0' != str[i]); i++)
 153         {
 154             str[i - c] = str[i];
 155         }
 156         str[i - c] = '\0';
 157     }
 158 }
 159
 160 void rtrim(char* str)
 161 {
 162     if (nullptr == str)
 163     {
 164         return;
 165     }
 166
 167     int nul = strlen(str) - 1;
 168     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')))
 169     {
 170         str[nul] = '\0';
 171         nul--;
 172     }
 173 }
 174
 175 void trim(char* str)
 176 {
 177     ltrim(str);
 178     rtrim(str);
 179 }
 180
 181 int gmx_strcasecmp_min(const char* str1, const char* str2)
 182 {
 183     char ch1 = 0, ch2 = 0;
 184
 185     do
 186     {
 187         do
 188         {
 189             ch1 = toupper(*(str1++));
 190         } while ((ch1 == '-') || (ch1 == '_'));
 191         do
 192         {
 193             ch2 = toupper(*(str2++));
 194         } while ((ch2 == '-') || (ch2 == '_'));
 195
 196         if (ch1 != ch2)
 197         {
 198             return (ch1 - ch2);
 199         }
 200     } while (ch1 != 0);
 201     return 0;
 202 }
 203
 204 int gmx_strncasecmp_min(const char* str1, const char* str2, int n)
 205 {
 206     char ch1 = 0, ch2 = 0;
 207
 208     const char* stri1 = str1;
 209     const char* stri2 = str2;
 210     do
 211     {
 212         do
 213         {
 214             ch1 = toupper(*(str1++));
 215         } while ((ch1 == '-') || (ch1 == '_'));
 216         do
 217         {
 218             ch2 = toupper(*(str2++));
 219         } while ((ch2 == '-') || (ch2 == '_'));
 220
 221         if (ch1 != ch2)
 222         {
 223             return (ch1 - ch2);
 224         }
 225     } while ((ch1 != 0) && (str1 - stri1 < n) && (str2 - stri2 < n));
 226     return 0;
 227 }
 228
 229 int gmx_strcasecmp(const char* str1, const char* str2)
 230 {
 231     char ch1 = 0, ch2 = 0;
 232
 233     do
 234     {
 235         ch1 = toupper(*(str1++));
 236         ch2 = toupper(*(str2++));
 237         if (ch1 != ch2)
 238         {
 239             return (ch1 - ch2);
 240         }
 241     } while (ch1 != 0);
 242     return 0;
 243 }
 244
 245 int gmx_strncasecmp(const char* str1, const char* str2, int n)
 246 {
 247     char ch1 = 0, ch2 = 0;
 248
 249     if (n == 0)
 250     {
 251         return 0;
 252     }
 253
 254     do
 255     {
 256         ch1 = toupper(*(str1++));
 257         ch2 = toupper(*(str2++));
 258         if (ch1 != ch2)
 259         {
 260             return (ch1 - ch2);
 261         }
 262         n--;
 263     } while ((ch1 != 0) && (n != 0));
 264     return 0;
 265 }
 266
 267 char* gmx_strdup(const char* src)
 268 {
 269     char* dest = nullptr;
 270
 271     auto length = strlen(src) + 1;
 272     snew(dest, length);
 273     std::strncpy(dest, src, length);
 274
 275     return dest;
 276 }
 277
 278 char* gmx_strndup(const char* src, int n)
 279 {
 280     char* dest = nullptr;
 281
 282     int len = strlen(src);
 283     if (len > n)
 284     {
 285         len = n;
 286     }
 287     snew(dest, len + 1);
 288     strncpy(dest, src, len);
 289     dest[len] = 0;
 290     return dest;
 291 }
 292
 293 /* Magic hash init number for Dan J. Bernsteins algorithm.
 294  * Do NOT use any other value unless you really know what you are doing.
 295  */
 296 const unsigned int gmx_string_hash_init = 5381;
 297
 298
 299 unsigned int gmx_string_fullhash_func(const char* s, unsigned int hash_init)
 300 {
 301     int c = 0;
 302
 303     while ((c = (*s++)) != '\0')
 304     {
 305         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 306     }
 307     return hash_init;
 308 }
 309
 310 unsigned int gmx_string_hash_func(const char* s, unsigned int hash_init)
 311 {
 312     int c = 0;
 313
 314     while ((c = toupper(*s++)) != '\0')
 315     {
 316         if (isalnum(c))
 317         {
 318             hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 319         }
 320     }
 321     return hash_init;
 322 }
 323
 324 int gmx_wcmatch(const char* pattern, const char* str)
 325 {
 326     while (*pattern)
 327     {
 328         if (*pattern == '*')
 329         {
 330             /* Skip multiple wildcards in a sequence */
 331             while (*pattern == '*' || *pattern == '?')
 332             {
 333                 ++pattern;
 334                 /* For ?, we need to check that there are characters left
 335                  * in str. */
 336                 if (*pattern == '?')
 337                 {
 338                     if (*str == 0)
 339                     {
 340                         return GMX_NO_WCMATCH;
 341                     }
 342                     else
 343                     {
 344                         ++str;
 345                     }
 346                 }
 347             }
 348             /* If the pattern ends after the star, we have a match */
 349             if (*pattern == 0)
 350             {
 351                 return 0;
 352             }
 353             /* Match the rest against each possible suffix of str */
 354             while (*str)
 355             {
 356                 /* Only do the recursive call if the first character
 357                  * matches. We don't have to worry about wildcards here,
 358                  * since we have processed them above. */
 359                 if (*pattern == *str)
 360                 {
 361                     /* Match the suffix, and return if a match or an error */
 362                     int rc = gmx_wcmatch(pattern, str);
 363                     if (rc != GMX_NO_WCMATCH)
 364                     {
 365                         return rc;
 366                     }
 367                 }
 368                 ++str;
 369             }
 370             /* If no suffix of str matches, we don't have a match */
 371             return GMX_NO_WCMATCH;
 372         }
 373         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 374         {
 375             ++str;
 376         }
 377         else
 378         {
 379             return GMX_NO_WCMATCH;
 380         }
 381         ++pattern;
 382     }
 383     /* When the pattern runs out, we have a match if the string has ended. */
 384     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 385 }
 386
 387 char* wrap_lines(const char* buf, int line_width, int indent, gmx_bool bIndentFirst)
 388 {
 389     int i = 0;
 390
 391     /* characters are copied from buf to b2 with possible spaces changed
 392      * into newlines and extra space added for indentation.
 393      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 394      * i0 points to the beginning of the current line (in buf, source)
 395      * lspace and l2space point to the last space on the current line
 396      * bFirst is set to prevent indentation of first line
 397      * bFitsOnLine says if the first space occurred before line_width, if
 398      * that is not the case, we have a word longer than line_width which
 399      * will also not fit on the next line, so we might as well keep it on
 400      * the current line (where it also won't fit, but looks better)
 401      */
 402
 403     char* b2    = nullptr;
 404     int   b2len = strlen(buf) + 1 + indent;
 405     snew(b2, b2len);
 406     int i0 = 0;
 407     int i2 = 0;
 408     if (bIndentFirst)
 409     {
 410         for (i2 = 0; (i2 < indent); i2++)
 411         {
 412             b2[i2] = ' ';
 413         }
 414     }
 415     bool bFirst = true;
 416     do
 417     {
 418         int lspace  = 0;
 419         int l2space = -1;
 420         /* find the last space before end of line */
 421         for (i = i0; ((i - i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 422         {
 423             b2[i2++] = buf[i];
 424             /* remember the position of a space */
 425             if (buf[i] == ' ')
 426             {
 427                 lspace  = i;
 428                 l2space = i2 - 1;
 429             }
 430             /* if we have a newline before the line is full, reset counters */
 431             if (buf[i] == '\n' && buf[i + 1])
 432             {
 433                 i0 = i + 1;
 434                 b2len += indent;
 435                 srenew(b2, b2len);
 436                 /* add indentation after the newline */
 437                 for (int j = 0; (j < indent); j++)
 438                 {
 439                     b2[i2++] = ' ';
 440                 }
 441             }
 442         }
 443         /* If we are at the last newline, copy it */
 444         if (buf[i] == '\n' && !buf[i + 1])
 445         {
 446             b2[i2++] = buf[i++];
 447         }
 448         /* if we're not at the end of the string */
 449         if (buf[i])
 450         {
 451             /* check if one word does not fit on the line */
 452             bool bFitsOnLine = (i - i0 <= line_width);
 453             /* reset line counters to just after the space */
 454             i0 = lspace + 1;
 455             i2 = l2space + 1;
 456             /* if the words fit on the line, and we're beyond the indentation part */
 457             if ((bFitsOnLine) && (l2space >= indent))
 458             {
 459                 /* start a new line */
 460                 b2[l2space] = '\n';
 461                 /* and add indentation */
 462                 if (indent)
 463                 {
 464                     if (bFirst)
 465                     {
 466                         line_width -= indent;
 467                         bFirst = FALSE;
 468                     }
 469                     b2len += indent;
 470                     srenew(b2, b2len);
 471                     for (int j = 0; (j < indent); j++)
 472                     {
 473                         b2[i2++] = ' ';
 474                     }
 475                     /* no extra spaces after indent; */
 476                     while (buf[i0] == ' ')
 477                     {
 478                         i0++;
 479                     }
 480                 }
 481             }
 482         }
 483     } while (buf[i] != 0);
 484     b2[i2] = '\0';
 485
 486     return b2;
 487 }
 488
 489 int64_t str_to_int64_t(const char* str, char** endptr)
 490 {
 491 #ifndef _MSC_VER
 492     return strtoll(str, endptr, 10);
 493 #else
 494     return _strtoi64(str, endptr, 10);
 495 #endif
 496 }
 497
 498 char* gmx_step_str(int64_t i, char* buf)
 499 {
 500     sprintf(buf, "%" PRId64, i);
 501     return buf;
 502 }