src/gromacs/onlinehelp/rstparser.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief
  37  * Implements classes from rstparser.h.
  38  *
  39  * \author Teemu Murtola <teemu.murtola@gmail.com>
  40  * \ingroup module_onlinehelp
  41  */
  42 #include "gmxpre.h"
  43
  44 #include "rstparser.h"
  45
  46 #include <cctype>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/utility/stringutil.h"
  51
  52 namespace gmx
  53 {
  54
  55 namespace
  56 {
  57
  58 /*! \brief
  59  * Counts the number of leading spaces in a text range.
  60  *
  61  * Does not throw.
  62  */
  63 int countLeadingSpace(const std::string& text, size_t start, size_t end)
  64 {
  65     for (size_t i = start; i < end; ++i)
  66     {
  67         if (!std::isspace(text[i]))
  68         {
  69             return i - start;
  70         }
  71     }
  72     return end - start;
  73 }
  74
  75 /*! \brief
  76  * Returns `true` if a list item starts in \p text at \p index.
  77  *
  78  * Does not throw.
  79  */
  80 bool startsListItem(const std::string& text, size_t index)
  81 {
  82     if (text.length() <= index + 1)
  83     {
  84         return false;
  85     }
  86     if (text[index] == '*' && std::isspace(text[index + 1]))
  87     {
  88         return true;
  89     }
  90     if (std::isdigit(text[index]))
  91     {
  92         while (index < text.length() && std::isdigit(text[index]))
  93         {
  94             ++index;
  95         }
  96         if (text.length() > index + 1 && text[index] == '.' && std::isspace(text[index + 1]))
  97         {
  98             return true;
  99         }
 100     }
 101     return false;
 102 }
 103
 104 /*! \brief
 105  * Returns `true` if a table starts in \p text at \p index.
 106  *
 107  * The function only inspects the first line for something that looks like a
 108  * reStructuredText table, and accepts also some malformed tables.
 109  * Any issues should be apparent when Sphinx parses the reStructuredText
 110  * export, so full validation is not done here.
 111  *
 112  * Does not throw.
 113  */
 114 bool startsTable(const std::string& text, size_t index)
 115 {
 116     if (text[index] == '=')
 117     {
 118         while (index < text.length() && text[index] != '\n')
 119         {
 120             if (text[index] != '=' && !std::isspace(text[index]))
 121             {
 122                 return false;
 123             }
 124             ++index;
 125         }
 126         return true;
 127     }
 128     else if (text[index] == '+')
 129     {
 130         while (index < text.length() && text[index] != '\n')
 131         {
 132             if (text[index] != '-' && text[index] != '+')
 133             {
 134                 return false;
 135             }
 136             ++index;
 137         }
 138         return true;
 139     }
 140     return false;
 141 }
 142
 143 /*! \brief
 144  * Returns `true` if a line in \p text starting at \p index is a title underline.
 145  *
 146  * Does not throw.
 147  */
 148 bool isTitleUnderline(const std::string& text, size_t index)
 149 {
 150     const char firstChar = text[index];
 151     if (std::ispunct(firstChar))
 152     {
 153         while (index < text.length() && text[index] != '\n')
 154         {
 155             if (text[index] != firstChar)
 156             {
 157                 return false;
 158             }
 159             ++index;
 160         }
 161         return true;
 162     }
 163     return false;
 164 }
 165
 166 } // namespace
 167
 168 /********************************************************************
 169  * RstParagraphIterator
 170  */
 171
 172 RstParagraphIterator::RstParagraphIterator(const std::string& text) :
 173     text_(text),
 174     begin_(0),
 175     end_(0),
 176     type_(eParagraphType_Normal),
 177     breakSize_(0),
 178     firstLineIndent_(0),
 179     indent_(0),
 180     nextBegin_(0),
 181     nextBreakSize_(0),
 182     literalIndent_(-1)
 183 {
 184 }
 185
 186 bool RstParagraphIterator::nextParagraph()
 187 {
 188     begin_     = nextBegin_;
 189     type_      = eParagraphType_Normal;
 190     breakSize_ = nextBreakSize_;
 191     // Skip leading newlines (includes those separating paragraphs).
 192     while (begin_ < text_.length() && text_[begin_] == '\n')
 193     {
 194         ++begin_;
 195     }
 196     if (begin_ == text_.length())
 197     {
 198         end_       = begin_;
 199         breakSize_ = 0;
 200         nextBegin_ = begin_;
 201         return false;
 202     }
 203     if (literalIndent_ >= 0)
 204     {
 205         type_ = eParagraphType_Literal;
 206     }
 207     // Loop over lines in input until the end of the current paragraph.
 208     size_t i         = begin_;
 209     int    lineCount = 0;
 210     while (true)
 211     {
 212         const bool   bFirstLine = (lineCount == 0);
 213         const size_t lineStart  = i;
 214         const size_t lineEnd    = std::min(text_.find('\n', i), text_.length());
 215         const int    lineIndent = countLeadingSpace(text_, lineStart, lineEnd);
 216         const size_t textStart  = lineStart + lineIndent;
 217         const bool   bListItem  = startsListItem(text_, textStart);
 218         // Return each list item as a separate paragraph to make the behavior
 219         // the same always; the item text could even contain multiple
 220         // paragraphs, that would anyways produce breaks.
 221         if (bListItem && !bFirstLine)
 222         {
 223             // Since there was no empty line in input, do not produce one in
 224             // the output, either.
 225             nextBreakSize_ = 1;
 226             // end_ is not updated to break the paragraph before the current line.
 227             break;
 228         }
 229         // Now we will actually use this line as part of this paragraph.
 230         end_ = lineEnd;
 231         ++lineCount;
 232         // Update indentation.
 233         if (bFirstLine)
 234         {
 235             firstLineIndent_ = indent_ = lineIndent;
 236             if (bListItem)
 237             {
 238                 // Find the indentation of the actual text after the
 239                 // bullet/number.
 240                 int prefixLength = 0;
 241                 while (!std::isspace(text_[textStart + prefixLength]))
 242                 {
 243                     ++prefixLength;
 244                 }
 245                 while (textStart + prefixLength < text_.length()
 246                        && std::isspace(text_[textStart + prefixLength]))
 247                 {
 248                     ++prefixLength;
 249                 }
 250                 indent_ += prefixLength;
 251             }
 252         }
 253         else
 254         {
 255             indent_ = std::min(indent_, lineIndent);
 256         }
 257         // We need to check for the title underline before checking for the
 258         // paragraph break so that the title is correctly recognized.
 259         if (lineCount == 2 && isTitleUnderline(text_, lineStart))
 260         {
 261             type_ = eParagraphType_Title;
 262         }
 263         // Check for end-of-input or an empty line, i.e., a normal paragraph
 264         // break.
 265         if (lineEnd + 1 >= text_.length() || text_[lineEnd + 1] == '\n')
 266         {
 267             nextBreakSize_ = 2;
 268             break;
 269         }
 270         // Always return the title as a separate paragraph, as it requires
 271         // different processing.
 272         // TODO: This should allow nicer formatting that shares
 273         // implementation with writeTitle() and honors the nesting depths etc.,
 274         // but that is not implemented.
 275         if (type_ == eParagraphType_Title)
 276         {
 277             // If we are here, there was no actual paragraph break, so do not
 278             // produce one in the output either.
 279             nextBreakSize_ = 1;
 280             break;
 281         }
 282         // Next loop starts at the character after the newline.
 283         i = lineEnd + 1;
 284     }
 285     nextBegin_ = end_;
 286     // Check if the next paragraph should be treated as a literal paragraph,
 287     // and deal with transformations for the :: marker.
 288     if (end_ - begin_ >= 2 && text_.compare(end_ - 2, 2, "::") == 0)
 289     {
 290         literalIndent_ = indent_;
 291         // Return the actual literal block if the paragraph was just an "::".
 292         if (end_ - begin_ == 2)
 293         {
 294             // Avoid leading whitespace at the beginning; breakSize_ == 0
 295             // only for the first paragraph.
 296             if (breakSize_ == 0)
 297             {
 298                 nextBreakSize_ = 0;
 299             }
 300             return nextParagraph();
 301         }
 302         // Remove one of the colons, or both if preceded by whitespace.
 303         const bool bRemoveDoubleColon = (text_[end_ - 3] == ' ');
 304         end_ -= (bRemoveDoubleColon ? 3 : 1);
 305     }
 306     else
 307     {
 308         literalIndent_ = -1;
 309     }
 310     // Treat a table like a literal block (preserve newlines).
 311     if (startsTable(text_, begin_ + firstLineIndent_))
 312     {
 313         type_ = eParagraphType_Literal;
 314     }
 315     return true;
 316 }
 317
 318 void RstParagraphIterator::getParagraphText(std::string* result) const
 319 {
 320     result->clear();
 321     result->reserve(end_ - begin_);
 322     result->append(breakSize_, '\n');
 323     const bool bPreserveNewlines = (type_ != eParagraphType_Normal);
 324     size_t     i                 = begin_;
 325     while (i < end_)
 326     {
 327         const bool   bFirstLine = (i == begin_);
 328         const size_t lineStart  = i + (bFirstLine ? firstLineIndent_ : indent_);
 329         const size_t lineEnd    = std::min(text_.find('\n', i), end_);
 330         if (!bFirstLine)
 331         {
 332             if (bPreserveNewlines)
 333             {
 334                 result->push_back('\n');
 335             }
 336             else if (!std::isspace((*result)[result->length() - 1]))
 337             {
 338                 result->push_back(' ');
 339             }
 340         }
 341         result->append(text_, lineStart, lineEnd - lineStart);
 342         i = lineEnd + 1;
 343     }
 344 }
 345
 346 } // namespace gmx