2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011-2018, The GROMACS development team.
5 * Copyright (c) 2019, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * Declares common string utility and formatting routines.
40 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_utility
44 #ifndef GMX_UTILITY_STRINGUTIL_H
45 #define GMX_UTILITY_STRINGUTIL_H
56 //! \addtogroup module_utility
60 * Tests whether a string is null or empty.
64 static inline bool isNullOrEmpty(const char* str)
66 return str == nullptr || str[0] == '\0';
70 * Tests whether a string starts with another string.
72 * \param[in] str String to process.
73 * \param[in] prefix Prefix to find.
74 * \returns true if \p str starts with \p prefix.
76 * Returns true if \p prefix is empty.
79 static inline bool startsWith(const std::string& str, const std::string& prefix)
81 return str.compare(0, prefix.length(), prefix) == 0;
83 //! \copydoc startsWith(const std::string &, const std::string &)
84 static inline bool startsWith(const char* str, const char* prefix)
86 return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
90 * Tests whether a string ends with another string.
92 * \param[in] str String to process.
93 * \param[in] suffix Suffix to find.
94 * \returns true if \p str ends with \p suffix.
96 * Returns true if \p suffix is NULL or empty.
99 bool endsWith(const char* str, const char* suffix);
100 //! \copydoc endsWith(const char *, const char *)
101 static inline bool endsWith(const std::string& str, const char* suffix)
103 return endsWith(str.c_str(), suffix);
107 * Tests whether a string contains another as a substring.
109 * \param[in] str String to process.
110 * \param[in] substr Substring to find.
111 * \returns true if \p str contains \p substr.
115 static inline bool contains(const std::string& str, const char* substr)
117 return str.find(substr) != std::string::npos;
119 //! \copydoc contains(const std::string &str, const char *substr)
120 static inline bool contains(const std::string& str, const std::string& substr)
122 return str.find(substr) != std::string::npos;
125 /*!\brief Returns number of space-separated words in zero-terminated char ptr
127 * \param s Character pointer to zero-terminated, which will not be changed.
129 * \returns number of words in string.
131 * \note This routine is mainly meant to support legacy code in GROMACS. For
132 * new source you should try hard to use C++ string objects instead.
134 std::size_t countWords(const char* s);
136 /*!\brief Returns the number of space-separated words in a string object
138 * \param str Reference to string object, which will not be changed.
140 * \returns number of words in string.
142 std::size_t countWords(const std::string& str);
144 //! \copydoc endsWith(const std::string &str, const char *suffix)
145 static inline bool endsWith(const std::string& str, const std::string& suffix)
147 return endsWith(str, suffix.c_str());
151 * Removes a suffix from a string.
153 * \param[in] str String to process.
154 * \param[in] suffix Suffix to remove.
155 * \returns \p str with \p suffix removed, or \p str unmodified if it does
156 * not end with \p suffix.
157 * \throws std::bad_alloc if out of memory.
159 * Returns \p str if \p suffix is NULL or empty.
161 std::string stripSuffixIfPresent(const std::string& str, const char* suffix);
163 * Removes leading and trailing whitespace from a string.
165 * \param[in] str String to process.
166 * \returns \p str with leading and trailing whitespaces removed.
167 * \throws std::bad_alloc if out of memory.
169 std::string stripString(const std::string& str);
171 # define gmx_format(archetype, string_index, first_to_check) \
172 __attribute__((format(archetype, string_index, first_to_check)))
174 /*! \brief GCC like function format attribute
176 * The format attribute specifies that a function takes printf, scanf, ...
177 * style arguments that should be type-checked against a format string.
178 * The attribute has to be placed after the function.
179 * This attribute is only valid for function declarations and not function
180 * definitions (GCC limitation). For member functions the implicit `this`
181 * pointer is included in the argument count.
183 # define gmx_format(archetype, string_index, first_to_check)
186 # define gmx_fmtstr _In_ _Printf_format_string_
188 /*! \brief MSVC like function format attribute
190 * Does type checking for printf like format strings in MSVC style.
191 * Attribute has to be placed before format string.
196 * Formats a string (snprintf() wrapper).
198 * \throws std::bad_alloc if out of memory.
200 * This function works like sprintf(), except that it returns an std::string
201 * instead of requiring a preallocated buffer. Arbitrary length output is
204 std::string formatString(gmx_fmtstr const char* fmt, ...) gmx_format(printf, 1, 2);
207 * Formats a string (vsnprintf() wrapper).
209 * \throws std::bad_alloc if out of memory.
211 * This function works like vsprintf(), except that it returns an std::string
212 * instead of requiring a preallocated buffer. Arbitrary length output is
215 std::string formatStringV(const char* fmt, va_list ap);
217 /*! \brief Function object that wraps a call to formatString() that
218 * expects a single conversion argument, for use with algorithms. */
219 class StringFormatter
222 /*! \brief Constructor
224 * \param[in] format The printf-style format string that will
225 * be applied to convert values of type T to
226 * string. Exactly one argument to the conversion
227 * specification(s) in `format` is supported. */
228 explicit StringFormatter(const char* format) : format_(format) {}
230 //! Implements the formatting functionality
232 std::string operator()(const T& value) const
234 return formatString(format_, value);
238 //! Format string to use
242 /*! \brief Function object to implement the same interface as
243 * `StringFormatter` to use with strings that should not be formatted
245 class IdentityFormatter
248 //! Implements the formatting non-functionality
249 std::string operator()(const std::string& value) const { return value; }
252 /*! \brief Formats all the range as strings, and then joins them with
253 * a separator in between.
255 * \param[in] begin Iterator the beginning of the range to join.
256 * \param[in] end Iterator the end of the range to join.
257 * \param[in] separator String to put in between the joined strings.
258 * \param[in] formatter Function object to format the objects in
259 * `container` as strings
260 * \returns All objects in the range from `begin` to `end` formatted
261 * as strings and concatenated with `separator` between each pair.
262 * \throws std::bad_alloc if out of memory.
264 template<typename InputIterator, typename FormatterType>
265 std::string formatAndJoin(InputIterator begin, InputIterator end, const char* separator, const FormatterType& formatter)
268 const char* currentSeparator = "";
269 for (InputIterator i = begin; i != end; ++i)
271 result.append(currentSeparator);
272 result.append(formatter(*i));
273 currentSeparator = separator;
278 /*! \brief Formats all elements of the container as strings, and then
279 * joins them with a separator in between.
281 * \param[in] container Objects to join.
282 * \param[in] separator String to put in between the joined strings.
283 * \param[in] formatter Function object to format the objects in
284 * `container` as strings
285 * \returns All objects from `container` formatted as strings and
286 * concatenated with `separator` between each pair.
287 * \throws std::bad_alloc if out of memory.
289 template<typename ContainerType, typename FormatterType>
290 std::string formatAndJoin(const ContainerType& container, const char* separator, const FormatterType& formatter)
292 return formatAndJoin(container.begin(), container.end(), separator, formatter);
296 * Joins strings from a range with a separator in between.
298 * \param[in] begin Iterator the beginning of the range to join.
299 * \param[in] end Iterator the end of the range to join.
300 * \param[in] separator String to put in between the joined strings.
301 * \returns All strings from (`begin`, `end`) concatenated with `separator`
303 * \throws std::bad_alloc if out of memory.
305 template<typename InputIterator>
306 std::string joinStrings(InputIterator begin, InputIterator end, const char* separator)
308 return formatAndJoin(begin, end, separator, IdentityFormatter());
312 * Joins strings from a container with a separator in between.
314 * \param[in] container Strings to join.
315 * \param[in] separator String to put in between the joined strings.
316 * \returns All strings from `container` concatenated with `separator`
318 * \throws std::bad_alloc if out of memory.
320 template<typename ContainerType>
321 std::string joinStrings(const ContainerType& container, const char* separator)
323 return joinStrings(container.begin(), container.end(), separator);
327 * Joins strings from an array with a separator in between.
329 * \param[in] array Array of strings to join.
330 * \param[in] separator String to put in between the joined strings.
331 * \tparam count Deduced number of elements in \p array.
332 * \returns All strings from `aray` concatenated with `separator`
334 * \throws std::bad_alloc if out of memory.
336 template<size_t count>
337 std::string joinStrings(const char* const (&array)[count], const char* separator)
339 return joinStrings(array, array + count, separator);
343 * Splits a string to whitespace separated tokens.
345 * \param[in] str String to process.
346 * \returns \p str split into tokens at each whitespace sequence.
347 * \throws std::bad_alloc if out of memory.
349 * This function works like `split` in Python, i.e., leading and trailing
350 * whitespace is ignored, and consecutive whitespaces are treated as a single
353 std::vector<std::string> splitString(const std::string& str);
355 * Splits a string to tokens separated by a given delimiter.
357 * \param[in] str String to process.
358 * \param[in] delim Delimiter to use for splitting.
359 * \returns \p str split into tokens at delimiter.
360 * \throws std::bad_alloc if out of memory.
362 * Unlike splitString(), consecutive delimiters will generate empty tokens, as
363 * will leading or trailing delimiters.
364 * Empty input will return an empty vector.
366 std::vector<std::string> splitDelimitedString(const std::string& str, char delim);
368 * Splits \c str to tokens separated by delimiter \c delim. Removes
369 * leading and trailing whitespace from those strings with std::isspace.
371 * \param[in] str String to process.
372 * \param[in] delim Delimiter to use for splitting.
373 * \returns \p str split into tokens at delimiter, with whitespace stripped.
374 * \throws std::bad_alloc if out of memory.
376 * Unlike splitString(), consecutive delimiters will generate empty tokens, as
377 * will leading or trailing delimiters.
378 * Empty input will return an empty vector.
379 * Input with only whitespace will return a vector of size 1,
380 * that contains an empty token.
382 std::vector<std::string> splitAndTrimDelimitedString(const std::string& str, char delim);
385 * Replace all occurrences of a string with another string.
387 * \param[in] input Input string.
388 * \param[in] from String to find.
389 * \param[in] to String to use to replace \p from.
390 * \returns Copy of \p input with all occurrences of \p from replaced with \p to.
391 * \throws std::bad_alloc if out of memory.
393 * The replacement is greedy and not recursive: starting from the beginning of
394 * \p input, each match of \p from is replaced with \p to, and the search for
395 * the next match begins after the end of the previous match.
397 * Compexity is O(N), where N is length of output.
399 * \see replaceAllWords()
401 std::string replaceAll(const std::string& input, const char* from, const char* to);
402 //! \copydoc replaceAll(const std::string &, const char *, const char *)
403 std::string replaceAll(const std::string& input, const std::string& from, const std::string& to);
405 * Replace whole words with others.
407 * \param[in] input Input string.
408 * \param[in] from String to find.
409 * \param[in] to String to use to replace \p from.
410 * \returns Copy of \p input with all \p from words replaced with \p to.
411 * \throws std::bad_alloc if out of memory.
413 * Works as replaceAll(), but a match is only considered if it is delimited by
414 * non-alphanumeric characters.
418 std::string replaceAllWords(const std::string& input, const char* from, const char* to);
419 //! \copydoc replaceAllWords(const std::string &, const char *, const char *)
420 std::string replaceAllWords(const std::string& input, const std::string& from, const std::string& to);
422 /*! \brief Return whether two strings are equal, ignoring case.
424 * Checks if two strings have the same length and if all characters
425 * in them match when compared case insensitive.
426 * Characters are converted by using std::tolower.
428 * \param[in] source Search string to compare against \p target.
429 * \param[in] target String to be matched to \p source.
430 * \returns True if the strings match.
432 bool equalCaseInsensitive(const std::string& source, const std::string& target);
435 * Checks if at most \p maxLengthOfComparison characters of two strings match case insensitive.
437 * The function tests two strings \p source and \p target to see if at most
438 * \p maxLengthOfComparison characters match between the two. If fewer characters are present
439 * in \p source, only the maximum number of characters in \p source will be compared instead.
440 * In this case both \p source and \p target also need to have the same length, or the strings will
441 * compare as false, even if \p target matches \p source over the length of \p source.
443 * If \p maxLengthOfComparison is 0, the function always returns true.
444 * Characters are converted by using std::tolower.
446 * \param[in] source Search string to compare against \p target.
447 * \param[in] target String to be matched to \p source.
448 * \param[in] maxLengthOfComparison The maximum string length to compare.
449 * \returns True if the strings match.
451 bool equalCaseInsensitive(const std::string& source, const std::string& target, size_t maxLengthOfComparison);
453 class TextLineWrapper;
456 * Stores settings for line wrapping.
458 * Methods in this class do not throw.
460 * \see TextLineWrapper
464 class TextLineWrapperSettings
468 * Initializes default wrapper settings.
470 * Default settings are:
471 * - No maximum line width (only explicit line breaks).
473 * - No continuation characters.
474 * - Do not keep final spaces in input strings.
476 TextLineWrapperSettings();
479 * Sets the maximum length for output lines.
481 * \param[in] length Maximum length for the lines after wrapping.
483 * If this method is not called, or is called with zero \p length, the
484 * wrapper has no maximum length (only wraps at explicit line breaks).
486 void setLineLength(int length) { maxLength_ = length; }
488 * Sets the indentation for output lines.
490 * \param[in] indent Number of spaces to add for indentation.
492 * If this method is not called, the wrapper does not add indentation.
494 void setIndent(int indent) { indent_ = indent; }
496 * Sets the indentation for first output line after a line break.
498 * \param[in] indent Number of spaces to add for indentation.
500 * If this method is not called, or called with \p indent equal to -1,
501 * the value set with setIndent() is used.
503 void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
505 * Sets whether final spaces in input should be kept.
507 * \param[in] bKeep Whether to keep spaces at the end of the input.
509 * This means that wrapping a string that ends in spaces also keeps
510 * those spaces in the output. This allows using the wrapper for
511 * partial lines where the initial part of the line may end in a space.
512 * By default, all trailing whitespace is removed. Note that this
513 * option does not affect spaces before an explicit newline: those are
516 void setKeepFinalSpaces(bool bKeep) { bKeepFinalSpaces_ = bKeep; }
518 * Sets a continuation marker for wrapped lines.
520 * \param[in] continuationChar Character to use to mark continuation
523 * If set to non-zero character code, this character is added at the
524 * end of each line where a line break is added by TextLineWrapper
525 * (but not after lines produced by explicit line breaks).
526 * The default (\c '\0') is to not add continuation markers.
528 * Note that currently, the continuation char may cause the output line
529 * length to exceed the value set with setLineLength() by at most two
532 void setContinuationChar(char continuationChar) { continuationChar_ = continuationChar; }
534 //! Returns the maximum length set with setLineLength().
535 int lineLength() const { return maxLength_; }
536 //! Returns the indentation set with setIndent().
537 int indent() const { return indent_; }
539 * Returns the indentation set with setFirstLineIndent().
541 * If setFirstLineIndent() has not been called or has been called with
542 * -1, indent() is returned.
544 int firstLineIndent() const { return (firstLineIndent_ >= 0 ? firstLineIndent_ : indent_); }
547 //! Maximum length of output lines, or <= 0 if no limit.
549 //! Number of spaces to indent each output line with.
552 * Number of spaces to indent the first line after a newline.
554 * If -1, \a indent_ is used.
556 int firstLineIndent_;
557 //! Whether to keep spaces at end of input.
558 bool bKeepFinalSpaces_;
559 //! If not \c '\0', mark each wrapping point with this character.
560 char continuationChar_;
562 //! Needed to access the members.
563 friend class TextLineWrapper;
567 * Wraps lines to a predefined length.
569 * This utility class wraps lines at word breaks to produce lines that are not
570 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
571 * Only space is considered a word separator. If a single word exceeds the
572 * maximum line length, it is still printed on a single line.
573 * Extra whitespace is stripped from the end of produced lines.
574 * Other options on the wrapping, such as the line length or indentation,
575 * can be changed using a TextLineWrapperSettings object.
577 * Two interfaces to do the wrapping are provided:
578 * -# High-level interface using either wrapToString() (produces a single
579 * string with embedded newlines) or wrapToVector() (produces a vector of
580 * strings with each line as one element).
581 * These methods operate on std::string and wrap the entire input string.
582 * -# Low-level interface using findNextLine() and formatLine().
583 * findNextLine() operates either on a C string or an std::string, and does
584 * not do any memory allocation (so it does not throw). It finds the next
585 * line to be wrapped, considering the wrapping settings.
586 * formatLine() does whitespace operations on the line found by
587 * findNextLine() and returns an std::string.
588 * These methods allow custom wrapping implementation to either avoid
589 * exceptions or to wrap only a part of the input string.
593 gmx::TextLineWrapper wrapper;
594 wrapper.settings().setLineLength(78);
595 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
600 class TextLineWrapper
604 * Constructs a new line wrapper with default settings.
610 * Constructs a new line wrapper with given settings.
612 * \param[in] settings Wrapping settings.
616 explicit TextLineWrapper(const TextLineWrapperSettings& settings) : settings_(settings) {}
619 * Provides access to settings of this wrapper.
621 * \returns The settings object for this wrapper.
623 * The returned object can be used to modify settings for the wrapper.
624 * All subsequent calls to wrapToString() and wrapToVector() use the
629 TextLineWrapperSettings& settings() { return settings_; }
631 //! Returns true if the wrapper would not modify the input string.
632 bool isTrivial() const;
635 * Finds the next line to be wrapped.
637 * \param[in] input String to wrap.
638 * \param[in] lineStart Index of first character of the line to find.
639 * \returns Index of first character of the next line.
641 * If this is the last line, returns the length of \p input.
642 * In determining the length of the returned line, this function
643 * considers the maximum line length, leaving space for indentation,
644 * and also whitespace stripping behavior.
645 * Thus, the line returned may be longer than the maximum line length
646 * if it has leading and/or trailing space.
647 * When wrapping a line on a space (not on an explicit line break),
648 * the returned index is always on a non-whitespace character after the
651 * To iterate over lines in a string, use the following code:
653 gmx::TextLineWrapper wrapper;
654 // <set desired wrapping settings>
655 size_t lineStart = 0;
656 size_t length = input.length();
657 while (lineStart < length)
659 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
660 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
661 // <do something with the line>
662 lineStart = nextLineStart;
669 size_t findNextLine(const char* input, size_t lineStart) const;
670 //! \copydoc findNextLine(const char *, size_t)const
671 size_t findNextLine(const std::string& input, size_t lineStart) const;
673 * Formats a single line for output according to wrapping settings.
675 * \param[in] input Input string.
676 * \param[in] lineStart Index of first character of the line to format.
677 * \param[in] lineEnd Index of first character of the next line.
678 * \returns The line with leading and/or trailing whitespace removed
679 * and indentation applied.
680 * \throws std::bad_alloc if out of memory.
682 * Intended to be used on the lines found by findNextLine().
683 * When used with the lines returned from findNextLine(), the returned
684 * line conforms to the wrapper settings.
685 * Trailing whitespace is always stripped (including any newlines,
686 * i.e., the return value does not contain a newline).
688 std::string formatLine(const std::string& input, size_t lineStart, size_t lineEnd) const;
691 * Formats a string, producing a single string with all the lines.
693 * \param[in] input String to wrap.
694 * \returns \p input with added newlines such that maximum line
695 * length is not exceeded.
696 * \throws std::bad_alloc if out of memory.
698 * Newlines in the input are preserved, including terminal newlines.
699 * Note that if the input does not contain a terminal newline, the
700 * output does not either.
702 std::string wrapToString(const std::string& input) const;
704 * Formats a string, producing a vector with all the lines.
706 * \param[in] input String to wrap.
707 * \returns \p input split into lines such that maximum line length
709 * \throws std::bad_alloc if out of memory.
711 * The strings in the returned vector do not contain newlines at the
713 * Note that a single terminal newline does not affect the output:
714 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
715 * produces two lines, the second of which is empty).
717 std::vector<std::string> wrapToVector(const std::string& input) const;
720 TextLineWrapperSettings settings_;