2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements classes from rstparser.h.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_onlinehelp
44 #include "rstparser.h"
50 #include "gromacs/utility/stringutil.h"
59 * Counts the number of leading spaces in a text range.
63 int countLeadingSpace(const std::string &text, size_t start, size_t end)
65 for (size_t i = start; i < end; ++i)
67 if (!std::isspace(text[i]))
76 * Returns `true` if a list item starts in \p text at \p index.
80 bool startsListItem(const std::string &text, size_t index)
82 if (text.length() <= index + 1)
86 if (text[index] == '*' && std::isspace(text[index+1]))
90 if (std::isdigit(text[index]))
92 while (index < text.length() && std::isdigit(text[index]))
96 if (text.length() > index + 1 && text[index] == '.'
97 && std::isspace(text[index+1]))
106 * Returns `true` if a table starts in \p text at \p index.
108 * The function only inspects the first line for something that looks like a
109 * reStructuredText table, and accepts also some malformed tables.
110 * Any issues should be apparent when Sphinx parses the reStructuredText
111 * export, so full validation is not done here.
115 bool startsTable(const std::string &text, size_t index)
117 if (text[index] == '=')
119 while (index < text.length() && text[index] != '\n')
121 if (text[index] != '=' && !std::isspace(text[index]))
129 else if (text[index] == '+')
131 while (index < text.length() && text[index] != '\n')
133 if (text[index] != '-' && text[index] != '+')
145 * Returns `true` if a line in \p text starting at \p index is a title underline.
149 bool isTitleUnderline(const std::string &text, size_t index)
151 const char firstChar = text[index];
152 if (std::ispunct(firstChar))
154 while (index < text.length() && text[index] != '\n')
156 if (text[index] != firstChar)
169 /********************************************************************
170 * RstParagraphIterator
173 RstParagraphIterator::RstParagraphIterator(const std::string &text)
174 : text_(text), begin_(0), end_(0), type_(eParagraphType_Normal),
175 breakSize_(0), firstLineIndent_(0), indent_(0),
176 nextBegin_(0), nextBreakSize_(0), literalIndent_(-1)
180 bool RstParagraphIterator::nextParagraph()
183 type_ = eParagraphType_Normal;
184 breakSize_ = nextBreakSize_;
185 // Skip leading newlines (includes those separating paragraphs).
186 while (begin_ < text_.length() && text_[begin_] == '\n')
190 if (begin_ == text_.length())
197 if (literalIndent_ >= 0)
199 type_ = eParagraphType_Literal;
201 // Loop over lines in input until the end of the current paragraph.
206 const bool bFirstLine = (lineCount == 0);
207 const size_t lineStart = i;
208 const size_t lineEnd = std::min(text_.find('\n', i), text_.length());
209 const int lineIndent = countLeadingSpace(text_, lineStart, lineEnd);
210 const size_t textStart = lineStart + lineIndent;
211 const bool bListItem = startsListItem(text_, textStart);
212 // Return each list item as a separate paragraph to make the behavior
213 // the same always; the item text could even contain multiple
214 // paragraphs, that would anyways produce breaks.
215 if (bListItem && !bFirstLine)
217 // Since there was no empty line in input, do not produce one in
218 // the output, either.
220 // end_ is not updated to break the paragraph before the current line.
223 // Now we will actually use this line as part of this paragraph.
226 // Update indentation.
229 firstLineIndent_ = indent_ = lineIndent;
232 // Find the indentation of the actual text after the
234 int prefixLength = 0;
235 while (!std::isspace(text_[textStart + prefixLength]))
239 while (textStart + prefixLength < text_.length()
240 && std::isspace(text_[textStart + prefixLength]))
244 indent_ += prefixLength;
249 indent_ = std::min(indent_, lineIndent);
251 // We need to check for the title underline before checking for the
252 // paragraph break so that the title is correctly recognized.
253 if (lineCount == 2 && isTitleUnderline(text_, lineStart))
255 type_ = eParagraphType_Title;
257 // Check for end-of-input or an empty line, i.e., a normal paragraph
259 if (lineEnd + 1 >= text_.length() || text_[lineEnd + 1] == '\n')
264 // Always return the title as a separate paragraph, as it requires
265 // different processing.
266 // TODO: This should allow nicer formatting that shares
267 // implementation with writeTitle() and honors the nesting depths etc.,
268 // but that is not implemented.
269 if (type_ == eParagraphType_Title)
271 // If we are here, there was no actual paragraph break, so do not
272 // produce one in the output either.
276 // Next loop starts at the character after the newline.
280 // Check if the next paragraph should be treated as a literal paragraph,
281 // and deal with transformations for the :: marker.
282 if (end_ - begin_ >= 2 && text_.compare(end_ - 2, 2, "::") == 0)
284 literalIndent_ = indent_;
285 // Return the actual literal block if the paragraph was just an "::".
286 if (end_ - begin_ == 2)
288 // Avoid leading whitespace at the beginning; breakSize_ == 0
289 // only for the first paragraph.
294 return nextParagraph();
296 // Remove one of the colons, or both if preceded by whitespace.
297 const bool bRemoveDoubleColon = (text_[end_ - 3] == ' ');
298 end_ -= (bRemoveDoubleColon ? 3 : 1);
304 // Treat a table like a literal block (preserve newlines).
305 if (startsTable(text_, begin_ + firstLineIndent_))
307 type_ = eParagraphType_Literal;
312 void RstParagraphIterator::getParagraphText(std::string *result) const
315 result->reserve(end_ - begin_);
316 result->append(breakSize_, '\n');
317 const bool bPreserveNewlines = (type_ != eParagraphType_Normal);
321 const bool bFirstLine = (i == begin_);
322 const size_t lineStart = i + (bFirstLine ? firstLineIndent_ : indent_);
323 const size_t lineEnd = std::min(text_.find('\n', i), end_);
326 if (bPreserveNewlines)
328 result->push_back('\n');
330 else if (!std::isspace((*result)[result->length() - 1]))
332 result->push_back(' ');
335 result->append(text_, lineStart, lineEnd - lineStart);