src/gromacs/onlinehelp/rstparser.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief
  37  * Implements classes from rstparser.h.
  38  *
  39  * \author Teemu Murtola <teemu.murtola@gmail.com>
  40  * \ingroup module_onlinehelp
  41  */
  42 #include "gmxpre.h"
  43
  44 #include "rstparser.h"
  45
  46 #include <cctype>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/utility/stringutil.h"
  51
  52 namespace gmx
  53 {
  54
  55 namespace
  56 {
  57
  58 /*! \brief
  59  * Counts the number of leading spaces in a text range.
  60  *
  61  * Does not throw.
  62  */
  63 int countLeadingSpace(const std::string &text, size_t start, size_t end)
  64 {
  65     for (size_t i = start; i < end; ++i)
  66     {
  67         if (!std::isspace(text[i]))
  68         {
  69             return i - start;
  70         }
  71     }
  72     return end - start;
  73 }
  74
  75 /*! \brief
  76  * Returns `true` if a list item starts in \p text at \p index.
  77  *
  78  * Does not throw.
  79  */
  80 bool startsListItem(const std::string &text, size_t index)
  81 {
  82     if (text.length() <= index + 1)
  83     {
  84         return false;
  85     }
  86     if (text[index] == '*' && std::isspace(text[index+1]))
  87     {
  88         return true;
  89     }
  90     if (std::isdigit(text[index]))
  91     {
  92         while (index < text.length() && std::isdigit(text[index]))
  93         {
  94             ++index;
  95         }
  96         if (text.length() > index + 1 && text[index] == '.'
  97             && std::isspace(text[index+1]))
  98         {
  99             return true;
 100         }
 101     }
 102     return false;
 103 }
 104
 105 /*! \brief
 106  * Returns `true` if a table starts in \p text at \p index.
 107  *
 108  * The function only inspects the first line for something that looks like a
 109  * reStructuredText table, and accepts also some malformed tables.
 110  * Any issues should be apparent when Sphinx parses the reStructuredText
 111  * export, so full validation is not done here.
 112  *
 113  * Does not throw.
 114  */
 115 bool startsTable(const std::string &text, size_t index)
 116 {
 117     if (text[index] == '=')
 118     {
 119         while (index < text.length() && text[index] != '\n')
 120         {
 121             if (text[index] != '=' && !std::isspace(text[index]))
 122             {
 123                 return false;
 124             }
 125             ++index;
 126         }
 127         return true;
 128     }
 129     else if (text[index] == '+')
 130     {
 131         while (index < text.length() && text[index] != '\n')
 132         {
 133             if (text[index] != '-' && text[index] != '+')
 134             {
 135                 return false;
 136             }
 137             ++index;
 138         }
 139         return true;
 140     }
 141     return false;
 142 }
 143
 144 /*! \brief
 145  * Returns `true` if a line in \p text starting at \p index is a title underline.
 146  *
 147  * Does not throw.
 148  */
 149 bool isTitleUnderline(const std::string &text, size_t index)
 150 {
 151     const char firstChar = text[index];
 152     if (std::ispunct(firstChar))
 153     {
 154         while (index < text.length() && text[index] != '\n')
 155         {
 156             if (text[index] != firstChar)
 157             {
 158                 return false;
 159             }
 160             ++index;
 161         }
 162         return true;
 163     }
 164     return false;
 165 }
 166
 167 }    // namespace
 168
 169 /********************************************************************
 170  * RstParagraphIterator
 171  */
 172
 173 RstParagraphIterator::RstParagraphIterator(const std::string &text)
 174     : text_(text), begin_(0), end_(0), type_(eParagraphType_Normal),
 175       breakSize_(0), firstLineIndent_(0), indent_(0),
 176       nextBegin_(0), nextBreakSize_(0), literalIndent_(-1)
 177 {
 178 }
 179
 180 bool RstParagraphIterator::nextParagraph()
 181 {
 182     begin_     = nextBegin_;
 183     type_      = eParagraphType_Normal;
 184     breakSize_ = nextBreakSize_;
 185     // Skip leading newlines (includes those separating paragraphs).
 186     while (begin_ < text_.length() && text_[begin_] == '\n')
 187     {
 188         ++begin_;
 189     }
 190     if (begin_ == text_.length())
 191     {
 192         end_       = begin_;
 193         breakSize_ = 0;
 194         nextBegin_ = begin_;
 195         return false;
 196     }
 197     if (literalIndent_ >= 0)
 198     {
 199         type_ = eParagraphType_Literal;
 200     }
 201     // Loop over lines in input until the end of the current paragraph.
 202     size_t i         = begin_;
 203     int    lineCount = 0;
 204     while (true)
 205     {
 206         const bool   bFirstLine = (lineCount == 0);
 207         const size_t lineStart  = i;
 208         const size_t lineEnd    = std::min(text_.find('\n', i), text_.length());
 209         const int    lineIndent = countLeadingSpace(text_, lineStart, lineEnd);
 210         const size_t textStart  = lineStart + lineIndent;
 211         const bool   bListItem  = startsListItem(text_, textStart);
 212         // Return each list item as a separate paragraph to make the behavior
 213         // the same always; the item text could even contain multiple
 214         // paragraphs, that would anyways produce breaks.
 215         if (bListItem && !bFirstLine)
 216         {
 217             // Since there was no empty line in input, do not produce one in
 218             // the output, either.
 219             nextBreakSize_ = 1;
 220             // end_ is not updated to break the paragraph before the current line.
 221             break;
 222         }
 223         // Now we will actually use this line as part of this paragraph.
 224         end_ = lineEnd;
 225         ++lineCount;
 226         // Update indentation.
 227         if (bFirstLine)
 228         {
 229             firstLineIndent_ = indent_ = lineIndent;
 230             if (bListItem)
 231             {
 232                 // Find the indentation of the actual text after the
 233                 // bullet/number.
 234                 int prefixLength = 0;
 235                 while (!std::isspace(text_[textStart + prefixLength]))
 236                 {
 237                     ++prefixLength;
 238                 }
 239                 while (textStart + prefixLength < text_.length()
 240                        && std::isspace(text_[textStart + prefixLength]))
 241                 {
 242                     ++prefixLength;
 243                 }
 244                 indent_ += prefixLength;
 245             }
 246         }
 247         else
 248         {
 249             indent_ = std::min(indent_, lineIndent);
 250         }
 251         // We need to check for the title underline before checking for the
 252         // paragraph break so that the title is correctly recognized.
 253         if (lineCount == 2 && isTitleUnderline(text_, lineStart))
 254         {
 255             type_ = eParagraphType_Title;
 256         }
 257         // Check for end-of-input or an empty line, i.e., a normal paragraph
 258         // break.
 259         if (lineEnd + 1 >= text_.length() || text_[lineEnd + 1] == '\n')
 260         {
 261             nextBreakSize_ = 2;
 262             break;
 263         }
 264         // Always return the title as a separate paragraph, as it requires
 265         // different processing.
 266         // TODO: This should allow nicer formatting that shares
 267         // implementation with writeTitle() and honors the nesting depths etc.,
 268         // but that is not implemented.
 269         if (type_ == eParagraphType_Title)
 270         {
 271             // If we are here, there was no actual paragraph break, so do not
 272             // produce one in the output either.
 273             nextBreakSize_ = 1;
 274             break;
 275         }
 276         // Next loop starts at the character after the newline.
 277         i = lineEnd + 1;
 278     }
 279     nextBegin_ = end_;
 280     // Check if the next paragraph should be treated as a literal paragraph,
 281     // and deal with transformations for the :: marker.
 282     if (end_ - begin_ >= 2 && text_.compare(end_ - 2, 2, "::") == 0)
 283     {
 284         literalIndent_ = indent_;
 285         // Return the actual literal block if the paragraph was just an "::".
 286         if (end_ - begin_ == 2)
 287         {
 288             // Avoid leading whitespace at the beginning; breakSize_ == 0
 289             // only for the first paragraph.
 290             if (breakSize_ == 0)
 291             {
 292                 nextBreakSize_ = 0;
 293             }
 294             return nextParagraph();
 295         }
 296         // Remove one of the colons, or both if preceded by whitespace.
 297         const bool bRemoveDoubleColon = (text_[end_ - 3] == ' ');
 298         end_ -= (bRemoveDoubleColon ? 3 : 1);
 299     }
 300     else
 301     {
 302         literalIndent_ = -1;
 303     }
 304     // Treat a table like a literal block (preserve newlines).
 305     if (startsTable(text_, begin_ + firstLineIndent_))
 306     {
 307         type_ = eParagraphType_Literal;
 308     }
 309     return true;
 310 }
 311
 312 void RstParagraphIterator::getParagraphText(std::string *result) const
 313 {
 314     result->clear();
 315     result->reserve(end_ - begin_);
 316     result->append(breakSize_, '\n');
 317     const bool bPreserveNewlines = (type_ != eParagraphType_Normal);
 318     size_t     i                 = begin_;
 319     while (i < end_)
 320     {
 321         const bool   bFirstLine = (i == begin_);
 322         const size_t lineStart  = i + (bFirstLine ? firstLineIndent_ : indent_);
 323         const size_t lineEnd    = std::min(text_.find('\n', i), end_);
 324         if (!bFirstLine)
 325         {
 326             if (bPreserveNewlines)
 327             {
 328                 result->push_back('\n');
 329             }
 330             else if (!std::isspace((*result)[result->length() - 1]))
 331             {
 332                 result->push_back(' ');
 333             }
 334         }
 335         result->append(text_, lineStart, lineEnd - lineStart);
 336         i = lineEnd + 1;
 337     }
 338 }
 339
 340 } // namespace gmx