src/gmxlib/string2.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42 #include "visibility.h"
  43
  44 #ifdef GMX_CRAY_XT3
  45 #undef HAVE_PWD_H
  46 #endif
  47
  48 #include <stdio.h>
  49 #include <ctype.h>
  50 #include <stdlib.h>
  51 #include <errno.h>
  52 #include <sys/types.h>
  53 #include <time.h>
  54
  55 #ifdef HAVE_SYS_TIME_H
  56 #include <sys/time.h>
  57 #endif
  58
  59
  60 #ifdef HAVE_PWD_H
  61 #include <pwd.h>
  62 #endif
  63 #include <time.h>
  64 #include <assert.h>
  65
  66 #include "typedefs.h"
  67 #include "smalloc.h"
  68 #include "gmx_fatal.h"
  69 #include "macros.h"
  70 #include "string2.h"
  71 #include "futil.h"
  72
  73 int continuing(char *s)
  74 /* strip trailing spaces and if s ends with a CONTINUE remove that too.
  75  * returns TRUE if s ends with a CONTINUE, FALSE otherwise.
  76  */
  77 {
  78   int sl;
  79   assert(s);
  80
  81   rtrim(s);
  82   sl = strlen(s);
  83   if ((sl > 0) && (s[sl-1] == CONTINUE)) {
  84     s[sl-1] = 0;
  85     return TRUE;
  86   }
  87   else
  88     return FALSE;
  89 }
  90
  91
  92
  93 char *fgets2(char *line, int n, FILE *stream)
  94 /* This routine reads a string from stream of max length n
  95  * and zero terminated, without newlines
  96  * line should be long enough (>= n)
  97  */
  98 {
  99   char *c;
 100   if (fgets(line,n,stream) == NULL) {
 101     return NULL;
 102   }
 103   if ((c=strchr(line,'\n')) != NULL) {
 104     *c = '\0';
 105   } else {
 106     /* A line not ending in a newline can only occur at the end of a file,
 107      * or because of n being too small.
 108      * Since both cases occur very infrequently, we can check for EOF.
 109      */
 110     if (!gmx_eof(stream)) {
 111       gmx_fatal(FARGS,"An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'",n,n,line);
 112     }
 113   }
 114   if ((c=strchr(line,'\r')) != NULL) {
 115     *c = '\0';
 116   }
 117
 118   return line;
 119 }
 120
 121 void strip_comment (char *line)
 122 {
 123   char *c;
 124
 125   if (!line)
 126     return;
 127
 128   /* search for a comment mark and replace it by a zero */
 129   if ((c = strchr(line,COMMENTSIGN)) != NULL)
 130     (*c) = 0;
 131 }
 132
 133 void upstring (char *str)
 134 {
 135   int i;
 136
 137   for (i=0; (i < (int)strlen(str)); i++)
 138     str[i] = toupper(str[i]);
 139 }
 140
 141 void ltrim (char *str)
 142 {
 143   char *tr;
 144   int i,c;
 145
 146   if (NULL == str)
 147     return;
 148
 149   c = 0;
 150   while (('\0' != str[c]) && isspace(str[c]))
 151     c++;
 152   if (c > 0)
 153     {
 154       for(i=c; ('\0' != str[i]); i++)
 155         str[i-c] = str[i];
 156       str[i-c] = '\0';
 157     }
 158 }
 159
 160 void rtrim (char *str)
 161 {
 162   int nul;
 163
 164   if (NULL == str)
 165     return;
 166
 167   nul = strlen(str)-1;
 168   while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
 169     str[nul] = '\0';
 170     nul--;
 171   }
 172 }
 173
 174 void trim (char *str)
 175 {
 176   ltrim (str);
 177   rtrim (str);
 178 }
 179
 180 GMX_LIBGMX_EXPORT
 181 char *
 182 gmx_ctime_r(const time_t *clock,char *buf, int n)
 183 {
 184     char tmpbuf[STRLEN];
 185
 186 #ifdef GMX_NATIVE_WINDOWS
 187     /* Windows */
 188     ctime_s( tmpbuf, STRLEN, clock );
 189 #elif (defined(__sun))
 190     /*Solaris*/
 191     ctime_r(clock, tmpbuf, n);
 192 #else
 193     ctime_r(clock,tmpbuf);
 194 #endif
 195     strncpy(buf,tmpbuf,n-1);
 196     buf[n-1]='\0';
 197
 198     return buf;
 199 }
 200
 201 void nice_header (FILE *out,const char *fn)
 202 {
 203   const char *unk = "onbekend";
 204   time_t clock;
 205   const char *user=unk;
 206   int    gh;
 207   uid_t  uid;
 208   char   buf[256]="";
 209   char   timebuf[STRLEN];
 210 #ifdef HAVE_PWD_H
 211   struct passwd *pw;
 212 #endif
 213
 214   /* Print a nice header above the file */
 215   time(&clock);
 216   fprintf (out,"%c\n",COMMENTSIGN);
 217   fprintf (out,"%c\tFile '%s' was generated\n",COMMENTSIGN,fn ? fn : unk);
 218
 219 #ifdef HAVE_PWD_H
 220   uid = getuid();
 221   pw  = getpwuid(uid);
 222   gh  = gethostname(buf,255);
 223   user= pw->pw_name;
 224 #else
 225   uid = 0;
 226   gh  = -1;
 227 #endif
 228
 229   gmx_ctime_r(&clock,timebuf,STRLEN);
 230   fprintf (out,"%c\tBy user: %s (%d)\n",COMMENTSIGN,
 231            user ? user : unk,(int) uid);
 232   fprintf(out,"%c\tOn host: %s\n",COMMENTSIGN,(gh == 0) ? buf : unk);
 233
 234   fprintf (out,"%c\tAt date: %s",COMMENTSIGN,timebuf);
 235   fprintf (out,"%c\n",COMMENTSIGN);
 236 }
 237
 238
 239 int gmx_strcasecmp_min(const char *str1, const char *str2)
 240 {
 241     char ch1,ch2;
 242
 243     do
 244     {
 245         do
 246         {
 247             ch1=toupper(*(str1++));
 248         }
 249         while ((ch1=='-') || (ch1=='_'));
 250         do
 251         {
 252             ch2=toupper(*(str2++));
 253         }
 254         while ((ch2=='-') || (ch2=='_'));
 255
 256         if (ch1!=ch2) return (ch1-ch2);
 257     }
 258     while (ch1);
 259     return 0;
 260 }
 261
 262 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 263 {
 264     char ch1,ch2;
 265     char *stri1, *stri2;
 266
 267     stri1=(char *)str1;
 268     stri2=(char *)str2;
 269     do
 270     {
 271         do
 272         {
 273             ch1=toupper(*(str1++));
 274         }
 275         while ((ch1=='-') || (ch1=='_'));
 276         do
 277         {
 278             ch2=toupper(*(str2++));
 279         }
 280         while ((ch2=='-') || (ch2=='_'));
 281
 282         if (ch1!=ch2) return (ch1-ch2);
 283     }
 284     while (ch1 && (str1-stri1<n) && (str2-stri2<n));
 285   return 0;
 286 }
 287
 288 int gmx_strcasecmp(const char *str1, const char *str2)
 289 {
 290   char ch1,ch2;
 291
 292   do
 293     {
 294       ch1=toupper(*(str1++));
 295       ch2=toupper(*(str2++));
 296       if (ch1!=ch2) return (ch1-ch2);
 297     }
 298   while (ch1);
 299   return 0;
 300 }
 301
 302 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 303 {
 304   char ch1,ch2;
 305
 306   if(n==0)
 307     return 0;
 308
 309   do
 310     {
 311       ch1=toupper(*(str1++));
 312       ch2=toupper(*(str2++));
 313       if (ch1!=ch2) return (ch1-ch2);
 314       n--;
 315     }
 316   while (ch1 && n);
 317   return 0;
 318 }
 319
 320 char *gmx_strdup(const char *src)
 321 {
 322   char *dest;
 323
 324   snew(dest,strlen(src)+1);
 325   strcpy(dest,src);
 326
 327   return dest;
 328 }
 329
 330 char *
 331 gmx_strndup(const char *src, int n)
 332 {
 333     int   len;
 334     char *dest;
 335
 336     len = strlen(src);
 337     if (len > n)
 338     {
 339         len = n;
 340     }
 341     snew(dest, len+1);
 342     strncpy(dest, src, len);
 343     dest[len] = 0;
 344     return dest;
 345 }
 346
 347 /* Magic hash init number for Dan J. Bernsteins algorithm.
 348  * Do NOT use any other value unless you really know what you are doing.
 349  */
 350 const unsigned int
 351 gmx_string_hash_init = 5381;
 352
 353
 354 unsigned int
 355 gmx_string_hash_func(const char *s, unsigned int hash_init)
 356 {
 357     int c;
 358
 359     while ((c = toupper(*s++)) != '\0')
 360     {
 361         if(isalnum(c)) hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 362     }
 363     return hash_init;
 364 }
 365
 366 /*!
 367  * \param[in] pattern  Pattern to match against.
 368  * \param[in] str      String to match.
 369  * \returns   0 on match, GMX_NO_WCMATCH if there is no match.
 370  *
 371  * Matches \p str against \p pattern, which may contain * and ? wildcards.
 372  * All other characters are matched literally.
 373  * Currently, it is not possible to match literal * or ?.
 374  */
 375 int
 376 gmx_wcmatch(const char *pattern, const char *str)
 377 {
 378     while (*pattern)
 379     {
 380         if (*pattern == '*')
 381         {
 382             /* Skip multiple wildcards in a sequence */
 383             while (*pattern == '*' || *pattern == '?')
 384             {
 385                 ++pattern;
 386                 /* For ?, we need to check that there are characters left
 387                  * in str. */
 388                 if (*pattern == '?')
 389                 {
 390                     if (*str == 0)
 391                     {
 392                         return GMX_NO_WCMATCH;
 393                     }
 394                     else
 395                     {
 396                         ++str;
 397                     }
 398                 }
 399             }
 400             /* If the pattern ends after the star, we have a match */
 401             if (*pattern == 0)
 402             {
 403                 return 0;
 404             }
 405             /* Match the rest against each possible suffix of str */
 406             while (*str)
 407             {
 408                 /* Only do the recursive call if the first character
 409                  * matches. We don't have to worry about wildcards here,
 410                  * since we have processed them above. */
 411                 if (*pattern == *str)
 412                 {
 413                     int rc;
 414                     /* Match the suffix, and return if a match or an error */
 415                     rc = gmx_wcmatch(pattern, str);
 416                     if (rc != GMX_NO_WCMATCH)
 417                     {
 418                         return rc;
 419                     }
 420                 }
 421                 ++str;
 422             }
 423             /* If no suffix of str matches, we don't have a match */
 424             return GMX_NO_WCMATCH;
 425         }
 426         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 427         {
 428             ++str;
 429         }
 430         else
 431         {
 432             return GMX_NO_WCMATCH;
 433         }
 434         ++pattern;
 435     }
 436     /* When the pattern runs out, we have a match if the string has ended. */
 437     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 438 }
 439
 440 char *wrap_lines(const char *buf,int line_width, int indent,gmx_bool bIndentFirst)
 441 {
 442   char *b2;
 443   int i,i0,i2,j,b2len,lspace=0,l2space=0;
 444   gmx_bool bFirst,bFitsOnLine;
 445
 446   /* characters are copied from buf to b2 with possible spaces changed
 447    * into newlines and extra space added for indentation.
 448    * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 449    * i0 points to the beginning of the current line (in buf, source)
 450    * lspace and l2space point to the last space on the current line
 451    * bFirst is set to prevent indentation of first line
 452    * bFitsOnLine says if the first space occurred before line_width, if
 453    * that is not the case, we have a word longer than line_width which
 454    * will also not fit on the next line, so we might as well keep it on
 455    * the current line (where it also won't fit, but looks better)
 456    */
 457
 458   b2=NULL;
 459   b2len=strlen(buf)+1+indent;
 460   snew(b2,b2len);
 461   i0=i2=0;
 462   if (bIndentFirst)
 463     for(i2=0; (i2<indent); i2++)
 464       b2[i2] = ' ';
 465   bFirst=TRUE;
 466   do {
 467     l2space = -1;
 468     /* find the last space before end of line */
 469     for(i=i0; ((i-i0 < line_width) || (l2space==-1)) && (buf[i]); i++) {
 470       b2[i2++] = buf[i];
 471       /* remember the position of a space */
 472       if (buf[i] == ' ') {
 473         lspace = i;
 474         l2space = i2-1;
 475       }
 476       /* if we have a newline before the line is full, reset counters */
 477       if (buf[i]=='\n' && buf[i+1]) {
 478         i0=i+1;
 479         b2len+=indent;
 480         srenew(b2, b2len);
 481         /* add indentation after the newline */
 482         for(j=0; (j<indent); j++)
 483           b2[i2++]=' ';
 484       }
 485     }
 486     /* If we are at the last newline, copy it */
 487     if (buf[i]=='\n' && !buf[i+1]) {
 488       b2[i2++] = buf[i++];
 489     }
 490     /* if we're not at the end of the string */
 491     if (buf[i]) {
 492       /* check if one word does not fit on the line */
 493       bFitsOnLine = (i-i0 <= line_width);
 494       /* reset line counters to just after the space */
 495       i0 = lspace+1;
 496       i2 = l2space+1;
 497       /* if the words fit on the line, and we're beyond the indentation part */
 498       if ( (bFitsOnLine) && (l2space >= indent) ) {
 499         /* start a new line */
 500         b2[l2space] = '\n';
 501         /* and add indentation */
 502         if (indent) {
 503           if (bFirst) {
 504             line_width-=indent;
 505             bFirst=FALSE;
 506           }
 507           b2len+=indent;
 508           srenew(b2, b2len);
 509           for(j=0; (j<indent); j++)
 510             b2[i2++]=' ';
 511           /* no extra spaces after indent; */
 512           while(buf[i0]==' ')
 513             i0++;
 514         }
 515       }
 516     }
 517   } while (buf[i]);
 518   b2[i2] = '\0';
 519
 520   return b2;
 521 }
 522
 523 char **split(char sep,char *str)
 524 {
 525   char **ptr = NULL;
 526   int  n,nn,nptr = 0;
 527
 528   if (str == NULL)
 529     return NULL;
 530   nn = strlen(str);
 531   for(n=0; (n<nn); n++)
 532     if (str[n] == sep)
 533       nptr++;
 534   snew(ptr,nptr+2);
 535   nptr = 0;
 536   while (*str != '\0') {
 537     while ((*str != '\0') && (*str == sep))
 538       str++;
 539     if (*str != '\0') {
 540       snew(ptr[nptr],1+strlen(str));
 541       n = 0;
 542       while ((*str != '\0') && (*str != sep)) {
 543         ptr[nptr][n] = *str;
 544         str++;
 545         n++;
 546       }
 547       ptr[nptr][n] = '\0';
 548       nptr++;
 549     }
 550   }
 551   ptr[nptr] = NULL;
 552
 553   return ptr;
 554 }
 555
 556
 557 gmx_large_int_t
 558 str_to_large_int_t(const char *str, char **endptr)
 559 {
 560         int         sign = 1;
 561         gmx_large_int_t  val  = 0;
 562         char        ch;
 563         const char  *p;
 564
 565         p = str;
 566         if(p==NULL)
 567         {
 568                 *endptr=NULL;
 569                 return 0;
 570         }
 571
 572         /* Strip off initial white space */
 573         while(isspace(*p))
 574         {
 575                 p++;
 576         }
 577         /* Conform to ISO C99 - return original pointer if string does not contain a number */
 578         if(*str=='\0')
 579         {
 580                 *endptr=(char *)str;
 581         }
 582
 583         if(*p=='-')
 584         {
 585                 p++;
 586                 sign *= -1;
 587         }
 588
 589         while( ((ch=*p) != '\0') && isdigit(ch) )
 590         {
 591                 /* Important to add sign here, so we dont overflow in final multiplication */
 592                 ch = (ch-'0')*sign;
 593                 val = val*10 + ch;
 594                 if(ch != val%10)
 595                 {
 596                         /* Some sort of overflow has occured, set endptr to original string */
 597                         *endptr=(char *)str;
 598                         errno = ERANGE;
 599                         return(0);
 600                 }
 601                 p++;
 602         }
 603
 604         *endptr=(char *)p;
 605
 606         return val;
 607 }
 608
 609 char *gmx_strsep(char **stringp, const char *delim)
 610 {
 611     char *ret;
 612     int len=strlen(delim);
 613     int i,j=0;
 614     int found=0;
 615
 616     if (! *stringp)
 617         return NULL;
 618     ret=*stringp;
 619     do
 620     {
 621         if ( (*stringp)[j] == '\0')
 622         {
 623             found=1;
 624             *stringp=NULL;
 625             break;
 626         }
 627         for (i=0;i<len;i++)
 628         {
 629             if ( (*stringp)[j]==delim[i])
 630             {
 631                 (*stringp)[j]='\0';
 632                 *stringp=*stringp+j+1;
 633                 found=1;
 634                 break;
 635             }
 636         }
 637         j++;
 638     } while (!found);
 639
 640     return ret;
 641 }
 642