src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #include "gmxpre.h"
  40
  41 #include "cstringutil.h"
  42
  43 #include <cassert>
  44 #include <cctype>
  45 #include <cstdio>
  46 #include <cstdlib>
  47 #include <cstring>
  48
  49 #include <string>
  50 #include <vector>
  51
  52 #include "gromacs/utility/basedefinitions.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/futil.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 //! Comment sign to use.
  58 #define COMMENTSIGN ';'
  59
  60 int continuing(char* s)
  61 {
  62     int sl;
  63     assert(s);
  64
  65     rtrim(s);
  66     sl = strlen(s);
  67     if ((sl > 0) && (s[sl - 1] == CONTINUE))
  68     {
  69         s[sl - 1] = 0;
  70         return TRUE;
  71     }
  72     else
  73     {
  74         return FALSE;
  75     }
  76 }
  77
  78
  79 char* fgets2(char* line, int n, FILE* stream)
  80 {
  81     char* c;
  82     if (fgets(line, n, stream) == nullptr)
  83     {
  84         return nullptr;
  85     }
  86     if ((c = strchr(line, '\n')) != nullptr)
  87     {
  88         *c = '\0';
  89     }
  90     else
  91     {
  92         /* A line not ending in a newline can only occur at the end of a file,
  93          * or because of n being too small.
  94          * Since both cases occur very infrequently, we can check for EOF.
  95          */
  96         if (!feof(stream))
  97         {
  98             gmx_fatal(FARGS,
  99                       "An input file contains a line longer than %d characters, while the buffer "
 100                       "passed to fgets2 has size %d. The line starts with: '%20.20s'",
 101                       n, n, line);
 102         }
 103     }
 104     if ((c = strchr(line, '\r')) != nullptr)
 105     {
 106         *c = '\0';
 107     }
 108
 109     return line;
 110 }
 111
 112 void strip_comment(char* line)
 113 {
 114     char* c;
 115
 116     if (!line)
 117     {
 118         return;
 119     }
 120
 121     /* search for a comment mark and replace it by a zero */
 122     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 123     {
 124         (*c) = 0;
 125     }
 126 }
 127
 128 void upstring(char* str)
 129 {
 130     int i;
 131
 132     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 133     {
 134         str[i] = toupper(str[i]);
 135     }
 136 }
 137
 138 void ltrim(char* str)
 139 {
 140     int i, c;
 141
 142     if (nullptr == str)
 143     {
 144         return;
 145     }
 146
 147     c = 0;
 148     while (('\0' != str[c]) && isspace(str[c]))
 149     {
 150         c++;
 151     }
 152     if (c > 0)
 153     {
 154         for (i = c; ('\0' != str[i]); i++)
 155         {
 156             str[i - c] = str[i];
 157         }
 158         str[i - c] = '\0';
 159     }
 160 }
 161
 162 void rtrim(char* str)
 163 {
 164     int nul;
 165
 166     if (nullptr == str)
 167     {
 168         return;
 169     }
 170
 171     nul = strlen(str) - 1;
 172     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')))
 173     {
 174         str[nul] = '\0';
 175         nul--;
 176     }
 177 }
 178
 179 void trim(char* str)
 180 {
 181     ltrim(str);
 182     rtrim(str);
 183 }
 184
 185 int gmx_strcasecmp_min(const char* str1, const char* str2)
 186 {
 187     char ch1, ch2;
 188
 189     do
 190     {
 191         do
 192         {
 193             ch1 = toupper(*(str1++));
 194         } while ((ch1 == '-') || (ch1 == '_'));
 195         do
 196         {
 197             ch2 = toupper(*(str2++));
 198         } while ((ch2 == '-') || (ch2 == '_'));
 199
 200         if (ch1 != ch2)
 201         {
 202             return (ch1 - ch2);
 203         }
 204     } while (ch1 != 0);
 205     return 0;
 206 }
 207
 208 int gmx_strncasecmp_min(const char* str1, const char* str2, int n)
 209 {
 210     char  ch1, ch2;
 211     char *stri1, *stri2;
 212
 213     stri1 = const_cast<char*>(str1);
 214     stri2 = const_cast<char*>(str2);
 215     do
 216     {
 217         do
 218         {
 219             ch1 = toupper(*(str1++));
 220         } while ((ch1 == '-') || (ch1 == '_'));
 221         do
 222         {
 223             ch2 = toupper(*(str2++));
 224         } while ((ch2 == '-') || (ch2 == '_'));
 225
 226         if (ch1 != ch2)
 227         {
 228             return (ch1 - ch2);
 229         }
 230     } while ((ch1 != 0) && (str1 - stri1 < n) && (str2 - stri2 < n));
 231     return 0;
 232 }
 233
 234 int gmx_strcasecmp(const char* str1, const char* str2)
 235 {
 236     char ch1, ch2;
 237
 238     do
 239     {
 240         ch1 = toupper(*(str1++));
 241         ch2 = toupper(*(str2++));
 242         if (ch1 != ch2)
 243         {
 244             return (ch1 - ch2);
 245         }
 246     } while (ch1 != 0);
 247     return 0;
 248 }
 249
 250 int gmx_strncasecmp(const char* str1, const char* str2, int n)
 251 {
 252     char ch1, ch2;
 253
 254     if (n == 0)
 255     {
 256         return 0;
 257     }
 258
 259     do
 260     {
 261         ch1 = toupper(*(str1++));
 262         ch2 = toupper(*(str2++));
 263         if (ch1 != ch2)
 264         {
 265             return (ch1 - ch2);
 266         }
 267         n--;
 268     } while ((ch1 != 0) && (n != 0));
 269     return 0;
 270 }
 271
 272 char* gmx_strdup(const char* src)
 273 {
 274     char* dest;
 275
 276     auto length = strlen(src) + 1;
 277     snew(dest, length);
 278     std::strncpy(dest, src, length);
 279
 280     return dest;
 281 }
 282
 283 char* gmx_strndup(const char* src, int n)
 284 {
 285     int   len;
 286     char* dest;
 287
 288     len = strlen(src);
 289     if (len > n)
 290     {
 291         len = n;
 292     }
 293     snew(dest, len + 1);
 294     strncpy(dest, src, len);
 295     dest[len] = 0;
 296     return dest;
 297 }
 298
 299 /* Magic hash init number for Dan J. Bernsteins algorithm.
 300  * Do NOT use any other value unless you really know what you are doing.
 301  */
 302 const unsigned int gmx_string_hash_init = 5381;
 303
 304
 305 unsigned int gmx_string_fullhash_func(const char* s, unsigned int hash_init)
 306 {
 307     int c;
 308
 309     while ((c = (*s++)) != '\0')
 310     {
 311         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 312     }
 313     return hash_init;
 314 }
 315
 316 unsigned int gmx_string_hash_func(const char* s, unsigned int hash_init)
 317 {
 318     int c;
 319
 320     while ((c = toupper(*s++)) != '\0')
 321     {
 322         if (isalnum(c))
 323         {
 324             hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 325         }
 326     }
 327     return hash_init;
 328 }
 329
 330 int gmx_wcmatch(const char* pattern, const char* str)
 331 {
 332     while (*pattern)
 333     {
 334         if (*pattern == '*')
 335         {
 336             /* Skip multiple wildcards in a sequence */
 337             while (*pattern == '*' || *pattern == '?')
 338             {
 339                 ++pattern;
 340                 /* For ?, we need to check that there are characters left
 341                  * in str. */
 342                 if (*pattern == '?')
 343                 {
 344                     if (*str == 0)
 345                     {
 346                         return GMX_NO_WCMATCH;
 347                     }
 348                     else
 349                     {
 350                         ++str;
 351                     }
 352                 }
 353             }
 354             /* If the pattern ends after the star, we have a match */
 355             if (*pattern == 0)
 356             {
 357                 return 0;
 358             }
 359             /* Match the rest against each possible suffix of str */
 360             while (*str)
 361             {
 362                 /* Only do the recursive call if the first character
 363                  * matches. We don't have to worry about wildcards here,
 364                  * since we have processed them above. */
 365                 if (*pattern == *str)
 366                 {
 367                     int rc;
 368                     /* Match the suffix, and return if a match or an error */
 369                     rc = gmx_wcmatch(pattern, str);
 370                     if (rc != GMX_NO_WCMATCH)
 371                     {
 372                         return rc;
 373                     }
 374                 }
 375                 ++str;
 376             }
 377             /* If no suffix of str matches, we don't have a match */
 378             return GMX_NO_WCMATCH;
 379         }
 380         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 381         {
 382             ++str;
 383         }
 384         else
 385         {
 386             return GMX_NO_WCMATCH;
 387         }
 388         ++pattern;
 389     }
 390     /* When the pattern runs out, we have a match if the string has ended. */
 391     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 392 }
 393
 394 char* wrap_lines(const char* buf, int line_width, int indent, gmx_bool bIndentFirst)
 395 {
 396     char*    b2;
 397     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 398     gmx_bool bFirst, bFitsOnLine;
 399
 400     /* characters are copied from buf to b2 with possible spaces changed
 401      * into newlines and extra space added for indentation.
 402      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 403      * i0 points to the beginning of the current line (in buf, source)
 404      * lspace and l2space point to the last space on the current line
 405      * bFirst is set to prevent indentation of first line
 406      * bFitsOnLine says if the first space occurred before line_width, if
 407      * that is not the case, we have a word longer than line_width which
 408      * will also not fit on the next line, so we might as well keep it on
 409      * the current line (where it also won't fit, but looks better)
 410      */
 411
 412     b2    = nullptr;
 413     b2len = strlen(buf) + 1 + indent;
 414     snew(b2, b2len);
 415     i0 = i2 = 0;
 416     if (bIndentFirst)
 417     {
 418         for (i2 = 0; (i2 < indent); i2++)
 419         {
 420             b2[i2] = ' ';
 421         }
 422     }
 423     bFirst = TRUE;
 424     do
 425     {
 426         l2space = -1;
 427         /* find the last space before end of line */
 428         for (i = i0; ((i - i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 429         {
 430             b2[i2++] = buf[i];
 431             /* remember the position of a space */
 432             if (buf[i] == ' ')
 433             {
 434                 lspace  = i;
 435                 l2space = i2 - 1;
 436             }
 437             /* if we have a newline before the line is full, reset counters */
 438             if (buf[i] == '\n' && buf[i + 1])
 439             {
 440                 i0 = i + 1;
 441                 b2len += indent;
 442                 srenew(b2, b2len);
 443                 /* add indentation after the newline */
 444                 for (j = 0; (j < indent); j++)
 445                 {
 446                     b2[i2++] = ' ';
 447                 }
 448             }
 449         }
 450         /* If we are at the last newline, copy it */
 451         if (buf[i] == '\n' && !buf[i + 1])
 452         {
 453             b2[i2++] = buf[i++];
 454         }
 455         /* if we're not at the end of the string */
 456         if (buf[i])
 457         {
 458             /* check if one word does not fit on the line */
 459             bFitsOnLine = (i - i0 <= line_width);
 460             /* reset line counters to just after the space */
 461             i0 = lspace + 1;
 462             i2 = l2space + 1;
 463             /* if the words fit on the line, and we're beyond the indentation part */
 464             if ((bFitsOnLine) && (l2space >= indent))
 465             {
 466                 /* start a new line */
 467                 b2[l2space] = '\n';
 468                 /* and add indentation */
 469                 if (indent)
 470                 {
 471                     if (bFirst)
 472                     {
 473                         line_width -= indent;
 474                         bFirst = FALSE;
 475                     }
 476                     b2len += indent;
 477                     srenew(b2, b2len);
 478                     for (j = 0; (j < indent); j++)
 479                     {
 480                         b2[i2++] = ' ';
 481                     }
 482                     /* no extra spaces after indent; */
 483                     while (buf[i0] == ' ')
 484                     {
 485                         i0++;
 486                     }
 487                 }
 488             }
 489         }
 490     } while (buf[i] != 0);
 491     b2[i2] = '\0';
 492
 493     return b2;
 494 }
 495
 496 int64_t str_to_int64_t(const char* str, char** endptr)
 497 {
 498 #ifndef _MSC_VER
 499     return strtoll(str, endptr, 10);
 500 #else
 501     return _strtoi64(str, endptr, 10);
 502 #endif
 503 }
 504
 505 char* gmx_step_str(int64_t i, char* buf)
 506 {
 507     sprintf(buf, "%" PRId64, i);
 508     return buf;
 509 }