2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013, by the GROMACS development team, led by
5 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
6 * others, as listed in the AUTHORS file in the top-level source
7 * directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Declares common string utility and formatting routines.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
41 * \ingroup module_utility
43 #ifndef GMX_UTILITY_STRINGUTIL_H
44 #define GMX_UTILITY_STRINGUTIL_H
54 //! \addtogroup module_utility
58 * Tests whether a string starts with another string.
60 * \param[in] str String to process.
61 * \param[in] prefix Prefix to find.
62 * \returns true if \p str starts with \p prefix.
64 * Returns true if \p prefix is empty.
67 bool inline startsWith(const std::string &str, const std::string &prefix)
69 return str.compare(0, prefix.length(), prefix) == 0;
71 //! \copydoc startsWith(const std::string &, const std::string &)
72 bool inline startsWith(const char *str, const char *prefix)
74 return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
78 * Tests whether a string ends with another string.
80 * \param[in] str String to process.
81 * \param[in] suffix Suffix to find.
82 * \returns true if \p str ends with \p suffix.
84 * Returns true if \p suffix is NULL or empty.
87 bool endsWith(const std::string &str, const char *suffix);
90 * Removes a suffix from a string.
92 * \param[in] str String to process.
93 * \param[in] suffix Suffix to remove.
94 * \returns \p str with \p suffix removed, or \p str unmodified if it does
95 * not end with \p suffix.
96 * \throws std::bad_alloc if out of memory.
98 * Returns \p str if \p suffix is NULL or empty.
100 std::string stripSuffixIfPresent(const std::string &str, const char *suffix);
103 * Format a string (snprintf() wrapper).
105 * \throws std::bad_alloc if out of memory.
107 * This function works like sprintf(), except that it returns an std::string
108 * instead of requiring a preallocated buffer. Arbitrary length output is
111 std::string formatString(const char *fmt, ...);
114 * Joins strings in an array to a single string.
116 * \param[in] sarray Array of strings to concatenate.
117 * \param[in] count Number of elements in \p sarray to concatenate.
118 * \returns All strings in \p sarray joined, ensuring at least one space
119 * between the strings.
120 * \throws std::bad_alloc if out of memory.
122 * The strings in the \p sarray array are concatenated, adding a single space
123 * between the strings if there is no whitespace in the end of a string.
124 * Terminal whitespace is removed.
126 std::string concatenateStrings(const char * const *sarray, size_t count);
128 * Convenience overload for joining strings in a C array (static data).
130 * \param[in] sarray Array of strings to concatenate.
131 * \tparam count Deduced number of elements in \p sarray.
132 * \returns All strings in \p sarray joined, ensuring at least one space
133 * between the strings.
134 * \throws std::bad_alloc if out of memory.
136 * \see concatenateStrings(const char * const *, size_t)
138 template <size_t count>
139 std::string concatenateStrings(const char * const (&sarray)[count])
141 return concatenateStrings(sarray, count);
145 * Replace all occurrences of a string with another string.
147 * \param[in] input Input string.
148 * \param[in] from String to find.
149 * \param[in] to String to use to replace \p from.
150 * \returns Copy of \p input with all occurrences of \p from replaced with \p to.
151 * \throws std::bad_alloc if out of memory.
153 * The replacement is greedy and not recursive: starting from the beginning of
154 * \p input, each match of \p from is replaced with \p to, and the search for
155 * the next match begins after the end of the previous match.
157 * Compexity is O(N), where N is length of output.
159 * \see replaceAllWords()
161 std::string replaceAll(const std::string &input,
162 const char *from, const char *to);
163 //! \copydoc replaceAll(const std::string &, const char *, const char *)
164 std::string replaceAll(const std::string &input,
165 const std::string &from, const std::string &to);
167 * Replace whole words with others.
169 * \param[in] input Input string.
170 * \param[in] from String to find.
171 * \param[in] to String to use to replace \p from.
172 * \returns Copy of \p input with all \p from words replaced with \p to.
173 * \throws std::bad_alloc if out of memory.
175 * Works as replaceAll(), but a match is only considered if it is delimited by
176 * non-alphanumeric characters.
180 std::string replaceAllWords(const std::string &input,
181 const char *from, const char *to);
182 //! \copydoc replaceAllWords(const std::string &, const char *, const char *)
183 std::string replaceAllWords(const std::string &input,
184 const std::string &from, const std::string &to);
186 class TextLineWrapper;
189 * Stores settings for line wrapping.
191 * Methods in this class do not throw.
193 * \see TextLineWrapper
197 class TextLineWrapperSettings
201 * Initializes default wrapper settings.
203 * Default settings are:
204 * - No maximum line width (only explicit line breaks).
206 * - No continuation characters.
207 * - Ignore whitespace after an explicit newline.
209 TextLineWrapperSettings();
212 * Sets the maximum length for output lines.
214 * \param[in] length Maximum length for the lines after wrapping.
216 * If this method is not called, or is called with zero \p length, the
217 * wrapper has no maximum length (only wraps at explicit line breaks).
219 void setLineLength(int length) { maxLength_ = length; }
221 * Sets the indentation for output lines.
223 * \param[in] indent Number of spaces to add for indentation.
225 * If this method is not called, the wrapper does not add indentation.
227 void setIndent(int indent) { indent_ = indent; }
229 * Sets the indentation for first output line after a line break.
231 * \param[in] indent Number of spaces to add for indentation.
233 * If this method is not called, or called with \p indent equal to -1,
234 * the value set with setIndent() is used.
236 void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
238 * Sets whether to remove spaces after an explicit newline.
240 * \param[in] bStrip If true, spaces after newline are ignored.
242 * If not removed, the space is added to the indentation set with
244 * The default is to strip such whitespace.
246 void setStripLeadingWhitespace(bool bStrip)
248 bStripLeadingWhitespace_ = bStrip;
251 * Sets a continuation marker for wrapped lines.
253 * \param[in] continuationChar Character to use to mark continuation
256 * If set to non-zero character code, this character is added at the
257 * end of each line where a line break is added by TextLineWrapper
258 * (but not after lines produced by explicit line breaks).
259 * The default (\c '\0') is to not add continuation markers.
261 * Note that currently, the continuation char may cause the output line
262 * length to exceed the value set with setLineLength() by at most two
265 void setContinuationChar(char continuationChar)
267 continuationChar_ = continuationChar;
270 //! Returns the maximum length set with setLineLength().
271 int lineLength() const { return maxLength_; }
272 //! Returns the indentation set with setIndent().
273 int indent() const { return indent_; }
275 * Returns the indentation set with setFirstLineIndent().
277 * If setFirstLineIndent() has not been called or has been called with
278 * -1, indent() is returned.
280 int firstLineIndent() const
282 return (firstLineIndent_ >= 0 ? firstLineIndent_ : indent_);
286 //! Maximum length of output lines, or <= 0 if no limit.
288 //! Number of spaces to indent each output line with.
291 * Number of spaces to indent the first line after a newline.
293 * If -1, \a indent_ is used.
295 int firstLineIndent_;
296 //! Whether to ignore or preserve space after a newline.
297 bool bStripLeadingWhitespace_;
298 //! If not \c '\0', mark each wrapping point with this character.
299 char continuationChar_;
301 //! Needed to access the members.
302 friend class TextLineWrapper;
306 * Wraps lines to a predefined length.
308 * This utility class wraps lines at word breaks to produce lines that are not
309 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
310 * Only space is considered a word separator. If a single word exceeds the
311 * maximum line length, it is still printed on a single line.
312 * Extra whitespace is stripped from the end of produced lines.
313 * Other options on the wrapping, such as the line length or indentation,
314 * can be changed using a TextLineWrapperSettings object.
316 * Two interfaces to do the wrapping are provided:
317 * -# High-level interface using either wrapToString() (produces a single
318 * string with embedded newlines) or wrapToVector() (produces a vector of
319 * strings with each line as one element).
320 * These methods operate on std::string and wrap the entire input string.
321 * -# Low-level interface using findNextLine() and formatLine().
322 * findNextLine() operates either on a C string or an std::string, and does
323 * not do any memory allocation (so it does not throw). It finds the next
324 * line to be wrapped, considering the wrapping settings.
325 * formatLine() does whitespace operations on the line found by
326 * findNextLine() and returns an std::string.
327 * These methods allow custom wrapping implementation to either avoid
328 * exceptions or to wrap only a part of the input string.
332 gmx::TextLineWrapper wrapper;
333 wrapper.settings().setLineLength(78);
334 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
339 class TextLineWrapper
343 * Constructs a new line wrapper with default settings.
351 * Constructs a new line wrapper with given settings.
353 * \param[in] settings Wrapping settings.
357 explicit TextLineWrapper(const TextLineWrapperSettings &settings)
358 : settings_(settings)
363 * Provides access to settings of this wrapper.
365 * \returns The settings object for this wrapper.
367 * The returned object can be used to modify settings for the wrapper.
368 * All subsequent calls to wrapToString() and wrapToVector() use the
373 TextLineWrapperSettings &settings() { return settings_; }
376 * Finds the next line to be wrapped.
378 * \param[in] input String to wrap.
379 * \param[in] lineStart Index of first character of the line to find.
380 * \returns Index of first character of the next line.
382 * If this is the last line, returns the length of \p input.
383 * In determining the length of the returned line, this function
384 * considers the maximum line length, leaving space for indentation,
385 * and also whitespace stripping behavior.
386 * Thus, the line returned may be longer than the maximum line length
387 * if it has leading and/or trailing space.
388 * When wrapping a line on a space (not on an explicit line break),
389 * the returned index is always on a non-whitespace character after the
392 * To iterate over lines in a string, use the following code:
394 gmx::TextLineWrapper wrapper;
395 // <set desired wrapping settings>
396 size_t lineStart = 0;
397 size_t length = input.length();
398 while (lineStart < length)
400 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
401 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
402 // <do something with the line>
403 lineStart = nextLineStart;
410 size_t findNextLine(const char *input, size_t lineStart) const;
411 //! \copydoc findNextLine(const char *, size_t)const
412 size_t findNextLine(const std::string &input, size_t lineStart) const;
414 * Formats a single line for output according to wrapping settings.
416 * \param[in] input Input string.
417 * \param[in] lineStart Index of first character of the line to format.
418 * \param[in] lineEnd Index of first character of the next line.
419 * \returns The line with leading and/or trailing whitespace removed
420 * and indentation applied.
421 * \throws std::bad_alloc if out of memory.
423 * Intended to be used on the lines found by findNextLine().
424 * When used with the lines returned from findNextLine(), the returned
425 * line conforms to the wrapper settings.
426 * Trailing whitespace is always stripped (including any newlines,
427 * i.e., the return value does not contain a newline).
429 std::string formatLine(const std::string &input,
430 size_t lineStart, size_t lineEnd) const;
433 * Formats a string, producing a single string with all the lines.
435 * \param[in] input String to wrap.
436 * \returns \p input with added newlines such that maximum line
437 * length is not exceeded.
438 * \throws std::bad_alloc if out of memory.
440 * Newlines in the input are preserved, including terminal newlines.
441 * Note that if the input does not contain a terminal newline, the
442 * output does not either.
444 std::string wrapToString(const std::string &input) const;
446 * Formats a string, producing a vector with all the lines.
448 * \param[in] input String to wrap.
449 * \returns \p input split into lines such that maximum line length
451 * \throws std::bad_alloc if out of memory.
453 * The strings in the returned vector do not contain newlines at the
455 * Note that a single terminal newline does not affect the output:
456 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
457 * produces two lines, the second of which is empty).
459 std::vector<std::string> wrapToVector(const std::string &input) const;
462 TextLineWrapperSettings settings_;