src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "cstringutil.h"
  41
  42 #include <cassert>
  43 #include <cctype>
  44 #include <cstdio>
  45 #include <cstdlib>
  46 #include <cstring>
  47
  48 #include <string>
  49 #include <vector>
  50
  51 #include "gromacs/utility/basedefinitions.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/utility/futil.h"
  54 #include "gromacs/utility/smalloc.h"
  55
  56 //! Comment sign to use.
  57 #define COMMENTSIGN ';'
  58
  59 int continuing(char* s)
  60 {
  61     int sl;
  62     assert(s);
  63
  64     rtrim(s);
  65     sl = strlen(s);
  66     if ((sl > 0) && (s[sl - 1] == CONTINUE))
  67     {
  68         s[sl - 1] = 0;
  69         return TRUE;
  70     }
  71     else
  72     {
  73         return FALSE;
  74     }
  75 }
  76
  77
  78 char* fgets2(char* line, int n, FILE* stream)
  79 {
  80     char* c;
  81     if (fgets(line, n, stream) == nullptr)
  82     {
  83         return nullptr;
  84     }
  85     if ((c = strchr(line, '\n')) != nullptr)
  86     {
  87         *c = '\0';
  88     }
  89     else
  90     {
  91         /* A line not ending in a newline can only occur at the end of a file,
  92          * or because of n being too small.
  93          * Since both cases occur very infrequently, we can check for EOF.
  94          */
  95         if (!feof(stream))
  96         {
  97             gmx_fatal(FARGS,
  98                       "An input file contains a line longer than %d characters, while the buffer "
  99                       "passed to fgets2 has size %d. The line starts with: '%20.20s'",
 100                       n, n, line);
 101         }
 102     }
 103     if ((c = strchr(line, '\r')) != nullptr)
 104     {
 105         *c = '\0';
 106     }
 107
 108     return line;
 109 }
 110
 111 void strip_comment(char* line)
 112 {
 113     char* c;
 114
 115     if (!line)
 116     {
 117         return;
 118     }
 119
 120     /* search for a comment mark and replace it by a zero */
 121     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 122     {
 123         (*c) = 0;
 124     }
 125 }
 126
 127 void upstring(char* str)
 128 {
 129     int i;
 130
 131     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 132     {
 133         str[i] = toupper(str[i]);
 134     }
 135 }
 136
 137 void ltrim(char* str)
 138 {
 139     int i, c;
 140
 141     if (nullptr == str)
 142     {
 143         return;
 144     }
 145
 146     c = 0;
 147     while (('\0' != str[c]) && isspace(str[c]))
 148     {
 149         c++;
 150     }
 151     if (c > 0)
 152     {
 153         for (i = c; ('\0' != str[i]); i++)
 154         {
 155             str[i - c] = str[i];
 156         }
 157         str[i - c] = '\0';
 158     }
 159 }
 160
 161 void rtrim(char* str)
 162 {
 163     int nul;
 164
 165     if (nullptr == str)
 166     {
 167         return;
 168     }
 169
 170     nul = strlen(str) - 1;
 171     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')))
 172     {
 173         str[nul] = '\0';
 174         nul--;
 175     }
 176 }
 177
 178 void trim(char* str)
 179 {
 180     ltrim(str);
 181     rtrim(str);
 182 }
 183
 184 int gmx_strcasecmp_min(const char* str1, const char* str2)
 185 {
 186     char ch1, ch2;
 187
 188     do
 189     {
 190         do
 191         {
 192             ch1 = toupper(*(str1++));
 193         } while ((ch1 == '-') || (ch1 == '_'));
 194         do
 195         {
 196             ch2 = toupper(*(str2++));
 197         } while ((ch2 == '-') || (ch2 == '_'));
 198
 199         if (ch1 != ch2)
 200         {
 201             return (ch1 - ch2);
 202         }
 203     } while (ch1 != 0);
 204     return 0;
 205 }
 206
 207 int gmx_strncasecmp_min(const char* str1, const char* str2, int n)
 208 {
 209     char  ch1, ch2;
 210     char *stri1, *stri2;
 211
 212     stri1 = const_cast<char*>(str1);
 213     stri2 = const_cast<char*>(str2);
 214     do
 215     {
 216         do
 217         {
 218             ch1 = toupper(*(str1++));
 219         } while ((ch1 == '-') || (ch1 == '_'));
 220         do
 221         {
 222             ch2 = toupper(*(str2++));
 223         } while ((ch2 == '-') || (ch2 == '_'));
 224
 225         if (ch1 != ch2)
 226         {
 227             return (ch1 - ch2);
 228         }
 229     } while ((ch1 != 0) && (str1 - stri1 < n) && (str2 - stri2 < n));
 230     return 0;
 231 }
 232
 233 int gmx_strcasecmp(const char* str1, const char* str2)
 234 {
 235     char ch1, ch2;
 236
 237     do
 238     {
 239         ch1 = toupper(*(str1++));
 240         ch2 = toupper(*(str2++));
 241         if (ch1 != ch2)
 242         {
 243             return (ch1 - ch2);
 244         }
 245     } while (ch1 != 0);
 246     return 0;
 247 }
 248
 249 int gmx_strncasecmp(const char* str1, const char* str2, int n)
 250 {
 251     char ch1, ch2;
 252
 253     if (n == 0)
 254     {
 255         return 0;
 256     }
 257
 258     do
 259     {
 260         ch1 = toupper(*(str1++));
 261         ch2 = toupper(*(str2++));
 262         if (ch1 != ch2)
 263         {
 264             return (ch1 - ch2);
 265         }
 266         n--;
 267     } while ((ch1 != 0) && (n != 0));
 268     return 0;
 269 }
 270
 271 char* gmx_strdup(const char* src)
 272 {
 273     char* dest;
 274
 275     auto length = strlen(src) + 1;
 276     snew(dest, length);
 277     std::strncpy(dest, src, length);
 278
 279     return dest;
 280 }
 281
 282 char* gmx_strndup(const char* src, int n)
 283 {
 284     int   len;
 285     char* dest;
 286
 287     len = strlen(src);
 288     if (len > n)
 289     {
 290         len = n;
 291     }
 292     snew(dest, len + 1);
 293     strncpy(dest, src, len);
 294     dest[len] = 0;
 295     return dest;
 296 }
 297
 298 /* Magic hash init number for Dan J. Bernsteins algorithm.
 299  * Do NOT use any other value unless you really know what you are doing.
 300  */
 301 const unsigned int gmx_string_hash_init = 5381;
 302
 303
 304 unsigned int gmx_string_fullhash_func(const char* s, unsigned int hash_init)
 305 {
 306     int c;
 307
 308     while ((c = (*s++)) != '\0')
 309     {
 310         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 311     }
 312     return hash_init;
 313 }
 314
 315 unsigned int gmx_string_hash_func(const char* s, unsigned int hash_init)
 316 {
 317     int c;
 318
 319     while ((c = toupper(*s++)) != '\0')
 320     {
 321         if (isalnum(c))
 322         {
 323             hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 324         }
 325     }
 326     return hash_init;
 327 }
 328
 329 int gmx_wcmatch(const char* pattern, const char* str)
 330 {
 331     while (*pattern)
 332     {
 333         if (*pattern == '*')
 334         {
 335             /* Skip multiple wildcards in a sequence */
 336             while (*pattern == '*' || *pattern == '?')
 337             {
 338                 ++pattern;
 339                 /* For ?, we need to check that there are characters left
 340                  * in str. */
 341                 if (*pattern == '?')
 342                 {
 343                     if (*str == 0)
 344                     {
 345                         return GMX_NO_WCMATCH;
 346                     }
 347                     else
 348                     {
 349                         ++str;
 350                     }
 351                 }
 352             }
 353             /* If the pattern ends after the star, we have a match */
 354             if (*pattern == 0)
 355             {
 356                 return 0;
 357             }
 358             /* Match the rest against each possible suffix of str */
 359             while (*str)
 360             {
 361                 /* Only do the recursive call if the first character
 362                  * matches. We don't have to worry about wildcards here,
 363                  * since we have processed them above. */
 364                 if (*pattern == *str)
 365                 {
 366                     int rc;
 367                     /* Match the suffix, and return if a match or an error */
 368                     rc = gmx_wcmatch(pattern, str);
 369                     if (rc != GMX_NO_WCMATCH)
 370                     {
 371                         return rc;
 372                     }
 373                 }
 374                 ++str;
 375             }
 376             /* If no suffix of str matches, we don't have a match */
 377             return GMX_NO_WCMATCH;
 378         }
 379         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 380         {
 381             ++str;
 382         }
 383         else
 384         {
 385             return GMX_NO_WCMATCH;
 386         }
 387         ++pattern;
 388     }
 389     /* When the pattern runs out, we have a match if the string has ended. */
 390     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 391 }
 392
 393 char* wrap_lines(const char* buf, int line_width, int indent, gmx_bool bIndentFirst)
 394 {
 395     char*    b2;
 396     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 397     gmx_bool bFirst, bFitsOnLine;
 398
 399     /* characters are copied from buf to b2 with possible spaces changed
 400      * into newlines and extra space added for indentation.
 401      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 402      * i0 points to the beginning of the current line (in buf, source)
 403      * lspace and l2space point to the last space on the current line
 404      * bFirst is set to prevent indentation of first line
 405      * bFitsOnLine says if the first space occurred before line_width, if
 406      * that is not the case, we have a word longer than line_width which
 407      * will also not fit on the next line, so we might as well keep it on
 408      * the current line (where it also won't fit, but looks better)
 409      */
 410
 411     b2    = nullptr;
 412     b2len = strlen(buf) + 1 + indent;
 413     snew(b2, b2len);
 414     i0 = i2 = 0;
 415     if (bIndentFirst)
 416     {
 417         for (i2 = 0; (i2 < indent); i2++)
 418         {
 419             b2[i2] = ' ';
 420         }
 421     }
 422     bFirst = TRUE;
 423     do
 424     {
 425         l2space = -1;
 426         /* find the last space before end of line */
 427         for (i = i0; ((i - i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 428         {
 429             b2[i2++] = buf[i];
 430             /* remember the position of a space */
 431             if (buf[i] == ' ')
 432             {
 433                 lspace  = i;
 434                 l2space = i2 - 1;
 435             }
 436             /* if we have a newline before the line is full, reset counters */
 437             if (buf[i] == '\n' && buf[i + 1])
 438             {
 439                 i0 = i + 1;
 440                 b2len += indent;
 441                 srenew(b2, b2len);
 442                 /* add indentation after the newline */
 443                 for (j = 0; (j < indent); j++)
 444                 {
 445                     b2[i2++] = ' ';
 446                 }
 447             }
 448         }
 449         /* If we are at the last newline, copy it */
 450         if (buf[i] == '\n' && !buf[i + 1])
 451         {
 452             b2[i2++] = buf[i++];
 453         }
 454         /* if we're not at the end of the string */
 455         if (buf[i])
 456         {
 457             /* check if one word does not fit on the line */
 458             bFitsOnLine = (i - i0 <= line_width);
 459             /* reset line counters to just after the space */
 460             i0 = lspace + 1;
 461             i2 = l2space + 1;
 462             /* if the words fit on the line, and we're beyond the indentation part */
 463             if ((bFitsOnLine) && (l2space >= indent))
 464             {
 465                 /* start a new line */
 466                 b2[l2space] = '\n';
 467                 /* and add indentation */
 468                 if (indent)
 469                 {
 470                     if (bFirst)
 471                     {
 472                         line_width -= indent;
 473                         bFirst = FALSE;
 474                     }
 475                     b2len += indent;
 476                     srenew(b2, b2len);
 477                     for (j = 0; (j < indent); j++)
 478                     {
 479                         b2[i2++] = ' ';
 480                     }
 481                     /* no extra spaces after indent; */
 482                     while (buf[i0] == ' ')
 483                     {
 484                         i0++;
 485                     }
 486                 }
 487             }
 488         }
 489     } while (buf[i] != 0);
 490     b2[i2] = '\0';
 491
 492     return b2;
 493 }
 494
 495 int64_t str_to_int64_t(const char* str, char** endptr)
 496 {
 497 #ifndef _MSC_VER
 498     return strtoll(str, endptr, 10);
 499 #else
 500     return _strtoi64(str, endptr, 10);
 501 #endif
 502 }
 503
 504 char* gmx_step_str(int64_t i, char* buf)
 505 {
 506     sprintf(buf, "%" PRId64, i);
 507     return buf;
 508 }