2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Declares common string utility and formatting routines.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
41 * \ingroup module_utility
43 #ifndef GMX_UTILITY_STRINGUTIL_H
44 #define GMX_UTILITY_STRINGUTIL_H
54 //! \addtogroup module_utility
58 * Tests whether a string is null or empty.
62 bool inline isNullOrEmpty(const char *str)
64 return str == NULL || str[0] == '\0';
68 * Tests whether a string starts with another string.
70 * \param[in] str String to process.
71 * \param[in] prefix Prefix to find.
72 * \returns true if \p str starts with \p prefix.
74 * Returns true if \p prefix is empty.
77 bool inline startsWith(const std::string &str, const std::string &prefix)
79 return str.compare(0, prefix.length(), prefix) == 0;
81 //! \copydoc startsWith(const std::string &, const std::string &)
82 bool inline startsWith(const char *str, const char *prefix)
84 return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
88 * Tests whether a string ends with another string.
90 * \param[in] str String to process.
91 * \param[in] suffix Suffix to find.
92 * \returns true if \p str ends with \p suffix.
94 * Returns true if \p suffix is NULL or empty.
97 bool endsWith(const std::string &str, const char *suffix);
100 * Removes a suffix from a string.
102 * \param[in] str String to process.
103 * \param[in] suffix Suffix to remove.
104 * \returns \p str with \p suffix removed, or \p str unmodified if it does
105 * not end with \p suffix.
106 * \throws std::bad_alloc if out of memory.
108 * Returns \p str if \p suffix is NULL or empty.
110 std::string stripSuffixIfPresent(const std::string &str, const char *suffix);
112 * Removes leading and trailing whitespace from a string.
114 * \param[in] str String to process.
115 * \returns \p str with leading and trailing whitespaces removed.
116 * \throws std::bad_alloc if out of memory.
118 std::string stripString(const std::string &str);
121 * Formats a string (snprintf() wrapper).
123 * \throws std::bad_alloc if out of memory.
125 * This function works like sprintf(), except that it returns an std::string
126 * instead of requiring a preallocated buffer. Arbitrary length output is
129 std::string formatString(const char *fmt, ...);
131 /*! \brief Function object that wraps a call to formatString() that
132 * expects a single conversion argument, for use with algorithms. */
133 class StringFormatter
136 /*! \brief Constructor
138 * \param[in] format The printf-style format string that will
139 * be applied to convert values of type T to
140 * string. Exactly one argument to the conversion
141 * specification(s) in `format` is supported. */
142 explicit StringFormatter(const char *format) : format_(format)
146 //! Implements the formatting functionality
147 template <typename T>
148 std::string operator()(const T &value) const
150 return formatString(format_, value);
154 //! Format string to use
158 /*! \brief Function object to implement the same interface as
159 * `StringFormatter` to use with strings that should not be formatted
161 class IdentityFormatter
164 //! Implements the formatting non-functionality
165 std::string operator()(const std::string &value) const
171 /*! \brief Formats all the range as strings, and then joins them with
172 * a separator in between.
174 * \param[in] begin Iterator the beginning of the range to join.
175 * \param[in] end Iterator the end of the range to join.
176 * \param[in] separator String to put in between the joined strings.
177 * \param[in] formatter Function object to format the objects in
178 * `container` as strings
179 * \returns All objects in the range from `begin` to `end` formatted
180 * as strings and concatenated with `separator` between each pair.
181 * \throws std::bad_alloc if out of memory.
183 template <typename InputIterator, typename FormatterType>
184 std::string formatAndJoin(InputIterator begin, InputIterator end, const char *separator, const FormatterType &formatter)
187 const char *currentSeparator = "";
188 for (InputIterator i = begin; i != end; ++i)
190 result.append(currentSeparator);
191 result.append(formatter(*i));
192 currentSeparator = separator;
197 /*! \brief Formats all elements of the container as strings, and then
198 * joins them with a separator in between.
200 * \param[in] container Objects to join.
201 * \param[in] separator String to put in between the joined strings.
202 * \param[in] formatter Function object to format the objects in
203 * `container` as strings
204 * \returns All objects from `container` formatted as strings and
205 * concatenated with `separator` between each pair.
206 * \throws std::bad_alloc if out of memory.
208 template <typename ContainerType, typename FormatterType>
209 std::string formatAndJoin(const ContainerType &container, const char *separator, const FormatterType &formatter)
211 return formatAndJoin(container.begin(), container.end(), separator, formatter);
215 * Joins strings from a range with a separator in between.
217 * \param[in] begin Iterator the beginning of the range to join.
218 * \param[in] end Iterator the end of the range to join.
219 * \param[in] separator String to put in between the joined strings.
220 * \returns All strings from (`begin`, `end`) concatenated with `separator`
222 * \throws std::bad_alloc if out of memory.
224 template <typename InputIterator>
225 std::string joinStrings(InputIterator begin, InputIterator end,
226 const char *separator)
228 return formatAndJoin(begin, end, separator, IdentityFormatter());
232 * Joins strings from a container with a separator in between.
234 * \param[in] container Strings to join.
235 * \param[in] separator String to put in between the joined strings.
236 * \returns All strings from `container` concatenated with `separator`
238 * \throws std::bad_alloc if out of memory.
240 template <typename ContainerType>
241 std::string joinStrings(const ContainerType &container, const char *separator)
243 return joinStrings(container.begin(), container.end(), separator);
247 * Joins strings in an array to a single string.
249 * \param[in] sarray Array of strings to concatenate.
250 * \param[in] count Number of elements in \p sarray to concatenate.
251 * \returns All strings in \p sarray joined, ensuring at least one space
252 * between the strings.
253 * \throws std::bad_alloc if out of memory.
255 * The strings in the \p sarray array are concatenated, adding a single space
256 * between the strings if there is no whitespace in the end of a string.
257 * Terminal whitespace is removed.
259 std::string concatenateStrings(const char * const *sarray, size_t count);
261 * Convenience overload for joining strings in a C array (static data).
263 * \param[in] sarray Array of strings to concatenate.
264 * \tparam count Deduced number of elements in \p sarray.
265 * \returns All strings in \p sarray joined, ensuring at least one space
266 * between the strings.
267 * \throws std::bad_alloc if out of memory.
269 * \see concatenateStrings(const char * const *, size_t)
271 template <size_t count>
272 std::string concatenateStrings(const char * const (&sarray)[count])
274 return concatenateStrings(sarray, count);
278 * Splits a string to whitespace separated tokens.
280 * \param[in] str String to process.
281 * \returns \p str split into tokens at each whitespace sequence.
282 * \throws std::bad_alloc if out of memory.
284 * This function works like `split` in Python, i.e., leading and trailing
285 * whitespace is ignored, and consecutive whitespaces are treated as a single
288 std::vector<std::string> splitString(const std::string &str);
291 * Replace all occurrences of a string with another string.
293 * \param[in] input Input string.
294 * \param[in] from String to find.
295 * \param[in] to String to use to replace \p from.
296 * \returns Copy of \p input with all occurrences of \p from replaced with \p to.
297 * \throws std::bad_alloc if out of memory.
299 * The replacement is greedy and not recursive: starting from the beginning of
300 * \p input, each match of \p from is replaced with \p to, and the search for
301 * the next match begins after the end of the previous match.
303 * Compexity is O(N), where N is length of output.
305 * \see replaceAllWords()
307 std::string replaceAll(const std::string &input,
308 const char *from, const char *to);
309 //! \copydoc replaceAll(const std::string &, const char *, const char *)
310 std::string replaceAll(const std::string &input,
311 const std::string &from, const std::string &to);
313 * Replace whole words with others.
315 * \param[in] input Input string.
316 * \param[in] from String to find.
317 * \param[in] to String to use to replace \p from.
318 * \returns Copy of \p input with all \p from words replaced with \p to.
319 * \throws std::bad_alloc if out of memory.
321 * Works as replaceAll(), but a match is only considered if it is delimited by
322 * non-alphanumeric characters.
326 std::string replaceAllWords(const std::string &input,
327 const char *from, const char *to);
328 //! \copydoc replaceAllWords(const std::string &, const char *, const char *)
329 std::string replaceAllWords(const std::string &input,
330 const std::string &from, const std::string &to);
332 class TextLineWrapper;
335 * Stores settings for line wrapping.
337 * Methods in this class do not throw.
339 * \see TextLineWrapper
343 class TextLineWrapperSettings
347 * Initializes default wrapper settings.
349 * Default settings are:
350 * - No maximum line width (only explicit line breaks).
352 * - No continuation characters.
353 * - Ignore whitespace after an explicit newline.
355 TextLineWrapperSettings();
358 * Sets the maximum length for output lines.
360 * \param[in] length Maximum length for the lines after wrapping.
362 * If this method is not called, or is called with zero \p length, the
363 * wrapper has no maximum length (only wraps at explicit line breaks).
365 void setLineLength(int length) { maxLength_ = length; }
367 * Sets the indentation for output lines.
369 * \param[in] indent Number of spaces to add for indentation.
371 * If this method is not called, the wrapper does not add indentation.
373 void setIndent(int indent) { indent_ = indent; }
375 * Sets the indentation for first output line after a line break.
377 * \param[in] indent Number of spaces to add for indentation.
379 * If this method is not called, or called with \p indent equal to -1,
380 * the value set with setIndent() is used.
382 void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
384 * Sets whether to remove spaces after an explicit newline.
386 * \param[in] bStrip If true, spaces after newline are ignored.
388 * If not removed, the space is added to the indentation set with
390 * The default is to strip such whitespace.
392 void setStripLeadingWhitespace(bool bStrip)
394 bStripLeadingWhitespace_ = bStrip;
397 * Sets a continuation marker for wrapped lines.
399 * \param[in] continuationChar Character to use to mark continuation
402 * If set to non-zero character code, this character is added at the
403 * end of each line where a line break is added by TextLineWrapper
404 * (but not after lines produced by explicit line breaks).
405 * The default (\c '\0') is to not add continuation markers.
407 * Note that currently, the continuation char may cause the output line
408 * length to exceed the value set with setLineLength() by at most two
411 void setContinuationChar(char continuationChar)
413 continuationChar_ = continuationChar;
416 //! Returns the maximum length set with setLineLength().
417 int lineLength() const { return maxLength_; }
418 //! Returns the indentation set with setIndent().
419 int indent() const { return indent_; }
421 * Returns the indentation set with setFirstLineIndent().
423 * If setFirstLineIndent() has not been called or has been called with
424 * -1, indent() is returned.
426 int firstLineIndent() const
428 return (firstLineIndent_ >= 0 ? firstLineIndent_ : indent_);
432 //! Maximum length of output lines, or <= 0 if no limit.
434 //! Number of spaces to indent each output line with.
437 * Number of spaces to indent the first line after a newline.
439 * If -1, \a indent_ is used.
441 int firstLineIndent_;
442 //! Whether to ignore or preserve space after a newline.
443 bool bStripLeadingWhitespace_;
444 //! If not \c '\0', mark each wrapping point with this character.
445 char continuationChar_;
447 //! Needed to access the members.
448 friend class TextLineWrapper;
452 * Wraps lines to a predefined length.
454 * This utility class wraps lines at word breaks to produce lines that are not
455 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
456 * Only space is considered a word separator. If a single word exceeds the
457 * maximum line length, it is still printed on a single line.
458 * Extra whitespace is stripped from the end of produced lines.
459 * Other options on the wrapping, such as the line length or indentation,
460 * can be changed using a TextLineWrapperSettings object.
462 * Two interfaces to do the wrapping are provided:
463 * -# High-level interface using either wrapToString() (produces a single
464 * string with embedded newlines) or wrapToVector() (produces a vector of
465 * strings with each line as one element).
466 * These methods operate on std::string and wrap the entire input string.
467 * -# Low-level interface using findNextLine() and formatLine().
468 * findNextLine() operates either on a C string or an std::string, and does
469 * not do any memory allocation (so it does not throw). It finds the next
470 * line to be wrapped, considering the wrapping settings.
471 * formatLine() does whitespace operations on the line found by
472 * findNextLine() and returns an std::string.
473 * These methods allow custom wrapping implementation to either avoid
474 * exceptions or to wrap only a part of the input string.
478 gmx::TextLineWrapper wrapper;
479 wrapper.settings().setLineLength(78);
480 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
485 class TextLineWrapper
489 * Constructs a new line wrapper with default settings.
497 * Constructs a new line wrapper with given settings.
499 * \param[in] settings Wrapping settings.
503 explicit TextLineWrapper(const TextLineWrapperSettings &settings)
504 : settings_(settings)
509 * Provides access to settings of this wrapper.
511 * \returns The settings object for this wrapper.
513 * The returned object can be used to modify settings for the wrapper.
514 * All subsequent calls to wrapToString() and wrapToVector() use the
519 TextLineWrapperSettings &settings() { return settings_; }
522 * Finds the next line to be wrapped.
524 * \param[in] input String to wrap.
525 * \param[in] lineStart Index of first character of the line to find.
526 * \returns Index of first character of the next line.
528 * If this is the last line, returns the length of \p input.
529 * In determining the length of the returned line, this function
530 * considers the maximum line length, leaving space for indentation,
531 * and also whitespace stripping behavior.
532 * Thus, the line returned may be longer than the maximum line length
533 * if it has leading and/or trailing space.
534 * When wrapping a line on a space (not on an explicit line break),
535 * the returned index is always on a non-whitespace character after the
538 * To iterate over lines in a string, use the following code:
540 gmx::TextLineWrapper wrapper;
541 // <set desired wrapping settings>
542 size_t lineStart = 0;
543 size_t length = input.length();
544 while (lineStart < length)
546 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
547 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
548 // <do something with the line>
549 lineStart = nextLineStart;
556 size_t findNextLine(const char *input, size_t lineStart) const;
557 //! \copydoc findNextLine(const char *, size_t)const
558 size_t findNextLine(const std::string &input, size_t lineStart) const;
560 * Formats a single line for output according to wrapping settings.
562 * \param[in] input Input string.
563 * \param[in] lineStart Index of first character of the line to format.
564 * \param[in] lineEnd Index of first character of the next line.
565 * \returns The line with leading and/or trailing whitespace removed
566 * and indentation applied.
567 * \throws std::bad_alloc if out of memory.
569 * Intended to be used on the lines found by findNextLine().
570 * When used with the lines returned from findNextLine(), the returned
571 * line conforms to the wrapper settings.
572 * Trailing whitespace is always stripped (including any newlines,
573 * i.e., the return value does not contain a newline).
575 std::string formatLine(const std::string &input,
576 size_t lineStart, size_t lineEnd) const;
579 * Formats a string, producing a single string with all the lines.
581 * \param[in] input String to wrap.
582 * \returns \p input with added newlines such that maximum line
583 * length is not exceeded.
584 * \throws std::bad_alloc if out of memory.
586 * Newlines in the input are preserved, including terminal newlines.
587 * Note that if the input does not contain a terminal newline, the
588 * output does not either.
590 std::string wrapToString(const std::string &input) const;
592 * Formats a string, producing a vector with all the lines.
594 * \param[in] input String to wrap.
595 * \returns \p input split into lines such that maximum line length
597 * \throws std::bad_alloc if out of memory.
599 * The strings in the returned vector do not contain newlines at the
601 * Note that a single terminal newline does not affect the output:
602 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
603 * produces two lines, the second of which is empty).
605 std::vector<std::string> wrapToVector(const std::string &input) const;
608 TextLineWrapperSettings settings_;