Better automated Doxygen documentation checker
authorTeemu Murtola <teemu.murtola@gmail.com>
Mon, 3 Mar 2014 19:06:37 +0000 (21:06 +0200)
committerTeemu Murtola <teemu.murtola@gmail.com>
Thu, 27 Mar 2014 18:04:49 +0000 (20:04 +0200)
Add a completely rewritten checker to replace 'make doccheck'.
This is based on first generating and then parsing the Doxygen XML
output.  It is somewhat slower than the old approach, but more robust
(exact format of the comments no longer matters) and allows relatively
easily checking all the documentation.  As a side effect, the XML
extraction also makes Doxygen parse those comments that appear in
undocumented files, so that at least syntax errors in them can be
spotted from warnings (the checker can also check them otherwise, and
even warn about them getting ignored).

The new check can be run with 'make doc-check'.  Not all functionality
from the old checker is available; documentation of the new checks in
doxygen.md follows when that is done, as well as some additional cleanup
of the code.

Currently, the new checker produces some warnings, which are suppressed
using suppressions.txt.

Short-term, this could be used to check for most common issues like
missing brief descriptions.

Medium-term, we could implement our own logic for enforcing what needs
to be documented, and disable all Doxygen-provided warnings about
undocumented members (by turning HIDE_UNDOC_MEMBERS=ON).  This would
remove the need for most \cond directives.

Long-term, we could even consider generating some extra member listings
or such from the XML output, and/or use it to patch the HTML pages
generated by Doxygen such that they would be more useful for our uses.

Change-Id: I25262b28699e10547e2116a50d3a9d8d5aa966a6

12 files changed:
doxygen/CMakeLists.txt
doxygen/Doxyfile-full.cmakein
doxygen/Doxyfile-lib.cmakein
doxygen/Doxyfile-user.cmakein
doxygen/Doxyfile-xml.cmakein [new file with mode: 0644]
doxygen/doxygen-check.py [new file with mode: 0755]
doxygen/doxygen.md
doxygen/doxygenxml.py [new file with mode: 0755]
doxygen/getInstalledHeaders.cmake [new file with mode: 0644]
doxygen/gmxtree.py [new file with mode: 0644]
doxygen/reporter.py [new file with mode: 0644]
doxygen/suppressions.txt [new file with mode: 0644]

index cf395c3a42092e9f60afa41137bc5ba43ac2e4e8..ee8e0992ba88ae0bdc80e16f0a22becce918c89d 100644 (file)
@@ -55,6 +55,20 @@ gmx_dependent_option(
     DOXYGEN_FOUND)
 mark_as_advanced(GMX_COMPACT_DOXYGEN)
 
+find_package(PythonInterp)
+set(USE_PYTHON_SCRIPTS OFF)
+if (PYTHONINTERP_FOUND AND NOT PYTHON_VERSION_STRING VERSION_LESS "2.6")
+    set(USE_PYTHON_SCRIPTS ON)
+endif()
+
+add_custom_target(find-installed-headers
+    COMMAND ${CMAKE_COMMAND}
+        -D SRCDIR=${CMAKE_SOURCE_DIR}
+        -D BUILDDIR=${CMAKE_BINARY_DIR}
+        -D OUTFILE=${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt
+        -P ${CMAKE_CURRENT_SOURCE_DIR}/getInstalledHeaders.cmake
+    COMMENT "Generating installed headers list" VERBATIM)
+
 ########################################################################
 # Doxygen configuration
 ########################################################################
@@ -72,6 +86,7 @@ if (DOXYGEN_FOUND)
     CONFIGURE_FILE(Doxyfile-full.cmakein Doxyfile-full)
     CONFIGURE_FILE(Doxyfile-lib.cmakein Doxyfile-lib)
     CONFIGURE_FILE(Doxyfile-user.cmakein Doxyfile-user)
+    CONFIGURE_FILE(Doxyfile-xml.cmakein Doxyfile-xml)
 
     if (GMX_COMPACT_DOXYGEN)
         FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile-common
@@ -92,6 +107,10 @@ if (DOXYGEN_FOUND)
         ${CMAKE_COMMAND} -DDOCTYPE=user -P RunDoxygen.cmake
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
         COMMENT "Generating public API documentation with Doxygen" VERBATIM)
+    add_custom_target(doc-xml
+        ${CMAKE_COMMAND} -DDOCTYPE=xml -P RunDoxygen.cmake
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMENT "Extracting Doxygen documentation to XML" VERBATIM)
     add_custom_target(doc-all)
     add_dependencies(doc-all doc-full doc-lib doc-user)
 
@@ -111,15 +130,31 @@ if (DOXYGEN_FOUND)
         add_dependencies(doc-full doxygen-version)
         add_dependencies(doc-lib doxygen-version)
         add_dependencies(doc-user doxygen-version)
+        add_dependencies(doc-xml doxygen-version)
     else()
         set(GMX_PROJECT_VERSION_STR ${PROJECT_VERSION})
         configure_file(Doxyfile-version.cmakein Doxyfile-version)
     endif()
-endif()
 
-find_package(PythonInterp)
+    if (USE_PYTHON_SCRIPTS)
+        set(doc_check_command
+            ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/doxygen-check.py
+            -S ${CMAKE_SOURCE_DIR} -B ${CMAKE_BINARY_DIR}
+            --installed ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt
+            -l ${CMAKE_CURRENT_BINARY_DIR}/doxygen-check.log
+            --ignore ${CMAKE_CURRENT_SOURCE_DIR}/suppressions.txt)
+        add_custom_target(doc-check COMMAND ${doc_check_command}
+            COMMENT "Checking Doxygen documentation" VERBATIM)
+        add_dependencies(doc-check doc-xml find-installed-headers)
+        # This target is the same as doc-check, but it doesn't rerun the
+        # dependencies each time, making it faster and more convenient for
+        # testing.
+        add_custom_target(doc-check-fast COMMAND ${doc_check_command}
+            COMMENT "Checking Doxygen documentation" VERBATIM)
+    endif()
+endif()
 
-if (PYTHONINTERP_FOUND AND NOT PYTHON_VERSION_STRING VERSION_LESS "2.6")
+if (USE_PYTHON_SCRIPTS)
     add_custom_target(depgraphs
         ${CMAKE_COMMAND}
         -DSRCDIR=${CMAKE_SOURCE_DIR}
index 6b447d68592c00c507043e8e43a9fb071f9300ec..589ef5d87cde06cd932f5ed3862e2989a88b13d3 100644 (file)
@@ -15,4 +15,3 @@ INLINE_INHERITED_MEMB  = NO  # Makes it easier to go through all documentation
 ALIASES               += inpublicapi="\ingroup group_publicapi"
 ALIASES               += inlibraryapi="\ingroup group_libraryapi"
 ALIASES               += libinternal=
-ALIASES               += endlibinternal=
index 497e358c1065df41bc372c52c06555893eb03d09..b2383e9d937b78b086baec1c7cd49c33acdaef50 100644 (file)
@@ -9,4 +9,3 @@ HTML_OUTPUT            = html-lib
 ALIASES               += inpublicapi="\ingroup group_publicapi"
 ALIASES               += inlibraryapi="\ingroup group_libraryapi"
 ALIASES               += libinternal=
-ALIASES               += endlibinternal=
index d2acd5fc5ab106589b352d04963d3bfd84630d0c..ba03b00e09b8427040447bfba6c58243be8ba737 100644 (file)
@@ -16,4 +16,3 @@ HTML_OUTPUT            = html-user
 ALIASES               += inpublicapi="\ingroup group_publicapi"
 ALIASES               += inlibraryapi="\ingroup group_libraryapi"
 ALIASES               += libinternal="\internal"
-ALIASES               += endlibinternal="\endinternal"
diff --git a/doxygen/Doxyfile-xml.cmakein b/doxygen/Doxyfile-xml.cmakein
new file mode 100644 (file)
index 0000000..babca03
--- /dev/null
@@ -0,0 +1,34 @@
+@INCLUDE               = Doxyfile-common
+
+PREDEFINED            += F77_FUNC(name,NAME)=name
+
+ENABLED_SECTIONS      += libapi internal
+INTERNAL_DOCS          = YES
+EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_ANON_NSPACES   = YES
+
+# We don't need the STL classes for our use of the XML,
+# and they generate broken references that would need extra handling
+BUILTIN_STL_SUPPORT    = NO
+WARN_IF_UNDOCUMENTED   = NO
+WARN_LOGFILE           = doxygen-xml.log
+CLASS_DIAGRAMS         = NO
+CLASS_GRAPH            = NO
+COLLABORATION_GRAPH    = NO
+DIRECTORY_GRAPH        = NO
+INCLUDE_GRAPH          = NO
+INCLUDED_BY_GRAPH      = NO
+
+INLINE_INHERITED_MEMB  = NO  # Do not duplicate documentation
+
+GENERATE_HTML          = NO
+GENERATE_XML           = YES
+XML_PROGRAMLISTING     = NO
+
+ALIASES               += inpublicapi="\ingroup group_publicapi"
+ALIASES               += inlibraryapi="\ingroup group_libraryapi"
+ALIASES               += libinternal="\xmlonly <libinternal /> \endxmlonly"
+
+# This is here because some comments that don't produce any useful
+# documentation still contain invalid Doxygen commands...
+ALIASES               += threadsafe=
diff --git a/doxygen/doxygen-check.py b/doxygen/doxygen-check.py
new file mode 100755 (executable)
index 0000000..c17224a
--- /dev/null
@@ -0,0 +1,201 @@
+#!/usr/bin/python
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+"""Check Doxygen documentation for issues that Doxygen does not warn about.
+
+This script for some issues in the Doxygen documentation, using Doxygen XML
+output.  Part of the checks are generic, like checking that all documented
+entities have brief descriptions.  Other are specific to GROMACS, like checking
+that only installed headers contribute to the public API documentation.
+
+The checks should be self-evident from the source code of the script.
+All the logic of parsing the Doxygen XML output and creating a GROMACS-specific
+representation of the source tree is separated into separate Python modules
+(doxygenxml.py and gmxtree.py, respectively).  Similarly, logic for handling
+the output messages is in reporter.py.   This leaves only the actual checks and
+the script command-line interface in this file.
+
+The script can be run using the 'doc-check' target generated by CMake.
+This target takes care of generating all the necessary input files and passing
+them to the script.
+"""
+
+import sys
+from optparse import OptionParser
+
+from gmxtree import GromacsTree, DocType
+from reporter import Reporter
+
+def check_file(fileobj, reporter):
+    """Check file-level documentation."""
+    if not fileobj.is_documented():
+        # TODO: Add rules for required documentation
+        return
+
+    if fileobj.is_source_file():
+        # TODO: Add rule to exclude examples from this check
+        if fileobj.is_installed():
+            reporter.file_error(fileobj, "source file is installed")
+        if fileobj.get_documentation_type() != DocType.internal:
+            reporter.file_error(fileobj,
+                    "source file documentation appears outside full documentation")
+        elif fileobj.get_api_type() != DocType.internal:
+            reporter.file_error(fileobj, "source file marked as non-internal")
+    elif fileobj.is_test_file() and fileobj.is_installed():
+        reporter.file_error(fileobj, "test file is installed")
+    elif fileobj.is_installed():
+        if fileobj.get_documentation_type() != DocType.public:
+            reporter.file_error(fileobj,
+                    "public header has non-public documentation")
+    elif fileobj.get_documentation_type() == DocType.public:
+        reporter.file_error(fileobj,
+                "non-installed header has public documentation")
+    elif fileobj.get_api_type() == DocType.public:
+        reporter.file_error(fileobj,
+                "non-installed header specified as part of public API")
+    elif fileobj.get_documentation_type() < fileobj.get_api_type():
+        reporter.file_error(fileobj,
+                "API type ({0}) conflicts with documentation visibility ({1})"
+                .format(fileobj.get_api_type(), fileobj.get_documentation_type()))
+
+    if not fileobj.has_brief_description():
+        reporter.file_error(fileobj,
+                "is documented, but does not have brief description")
+
+    expectedmod = fileobj.get_expected_module()
+    if expectedmod:
+        docmodules = fileobj.get_doc_modules()
+        if docmodules:
+            for module in docmodules:
+                if module != expectedmod:
+                    reporter.file_error(fileobj,
+                            "is documented in incorrect module: {0}"
+                            .format(module.get_name()))
+        elif expectedmod.is_documented():
+            reporter.file_error(fileobj,
+                    "is not documented in any module, but {0} exists"
+                    .format(expectedmod.get_name()))
+
+def check_entity(entity, reporter):
+    """Check documentation for a code construct."""
+    if entity.is_documented():
+        if not entity.has_brief_description():
+            reporter.doc_error(entity,
+                    "is documented, but does not have brief description")
+
+def check_class(classobj, reporter):
+    """Check documentation for a class/struct/union."""
+    check_entity(classobj, reporter)
+    if classobj.is_documented():
+        classtype = classobj.get_documentation_type()
+        filetype = classobj.get_file_documentation_type()
+        if classtype == DocType.public and not classobj.is_in_installed_file():
+            reporter.doc_error(classobj,
+                    "has public documentation, but is not in installed header")
+        elif filetype is not DocType.none and classtype > filetype:
+            reporter.doc_error(classobj,
+                    "is in {0} file(s), but appears in {1} documentation"
+                    .format(filetype, classtype))
+
+def check_member(member, reporter):
+    """Check documentation for a generic member."""
+    check_entity(member, reporter)
+    if member.is_documented():
+        if not member.is_visible():
+            # TODO: This is triggered by members in anonymous namespaces.
+            reporter.doc_note(member,
+                    "is documented, but is ignored by Doxygen, because its scope is not documented")
+        if member.has_inbody_description():
+            reporter.doc_note(member, "has in-body comments, which are ignored")
+
+def main():
+    """Run the checking script."""
+    parser = OptionParser()
+    parser.add_option('-S', '--source-root',
+                      help='Source tree root directory')
+    parser.add_option('-B', '--build-root',
+                      help='Build tree root directory')
+    parser.add_option('--installed',
+                      help='Read list of installed files from given file')
+    parser.add_option('-l', '--log',
+                      help='Write issues into a given log file in addition to stderr')
+    parser.add_option('--ignore',
+                      help='Set file with patterns for messages to ignore')
+    parser.add_option('--check-ignored', action='store_true',
+                      help='Check documentation ignored by Doxygen')
+    parser.add_option('-q', '--quiet', action='store_true',
+                      help='Do not write status messages')
+    options, args = parser.parse_args()
+
+    installedlist = []
+    if options.installed:
+        with open(options.installed, 'r') as outfile:
+            for line in outfile:
+                installedlist.append(line.strip())
+
+    reporter = Reporter(options.log)
+    if options.ignore:
+        reporter.load_filters(options.ignore)
+
+    if not options.quiet:
+        sys.stderr.write('Scanning source tree...\n')
+    tree = GromacsTree(options.source_root, options.build_root, reporter)
+    tree.set_installed_file_list(installedlist)
+    if not options.quiet:
+        sys.stderr.write('Reading Doxygen XML files...\n')
+    tree.load_xml()
+
+    reporter.write_pending()
+
+    if not options.quiet:
+        sys.stderr.write('Checking...\n')
+
+    for fileobj in tree.get_files():
+        check_file(fileobj, reporter)
+
+    for classobj in tree.get_classes():
+        check_class(classobj, reporter)
+
+    for memberobj in tree.get_members():
+        if memberobj.is_visible() or options.check_ignored:
+            check_member(memberobj, reporter)
+
+    # TODO: Check #include statements, like old 'make doccheck'
+
+    reporter.write_pending()
+    reporter.report_unused_filters()
+    reporter.close_log()
+
+main()
index 74dbcc9a7f811fb3ba88e6476e5e0478b60e5d41..b759e7310e971238ab1acbfac0fb6489dfa2d344 100644 (file)
@@ -189,14 +189,16 @@ different mechanisms are used:
   declarations local to source files only in the full documentation.
   You can find the details from the `Doxyfile-*.cmakein` files, and some of
   them are also mentioned below on individual code constructs.
-* The standard Doxygen commands \c \\internal and \c \\endinternal mark the
-  documentation to be only extracted into the full documentation
-  (`INTERNAL_DOCS` is `ON` only for the full documentation).
-  In addition, \Gromacs specific custom Doxygen commands \c \\libinternal and
-  \c \\endlibinternal are provided, which only exclude the documentation from
-  the public API documentation.  These are implemented by expanding the
-  commands to either \c \\internal or to a no-op, depending on the
-  documentation level.
+* The standard Doxygen command \c \\internal marks the documentation to be only
+  extracted into the full documentation (`INTERNAL_DOCS` is `ON` only for the
+  full documentation).  This should be used as a first command in a comment
+  block to exclude all the documentation.  It is possible to use \c \\internal
+  and \c \\endinternal to exclude individual paragraphs, but \c \\if `internal`
+  is preferred (see below).
+  In addition, \Gromacs-specific custom Doxygen command \c \\libinternal is
+  provided, which should be used the same way to exclude the documentation from
+  the public API documentation.  This command expands to either \c \\internal
+  or to a no-op, depending on the documentation level.
 * Doxygen commands \c \\if and \c \\cond can be used with section names
   `libapi` and `internal` to only include the documentation in library API and
   the full documentation, respectively.  `libapi` is also defined in the full
diff --git a/doxygen/doxygenxml.py b/doxygen/doxygenxml.py
new file mode 100755 (executable)
index 0000000..b63ad36
--- /dev/null
@@ -0,0 +1,1251 @@
+#!/usr/bin/python
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+"""Doxygen XML output parser.
+
+This module implements a parser for the Doxygen XML output, converting it into
+an object model that can be used to navigate the documentation.  It also uses
+knowledge from how Doxygen works to provide access to things like visibility of
+individual member documentation (e.g., based on what is the visibility of its
+parent compound objects).
+
+The object model is rooted at a DocumentationSet object.  Each documented
+entity is modeled as an Entity, and this has subclasses Member and Compound to
+correspond to the two categories of items that Doxygen handles.  These classes
+are further subclassed to match each kind of entity that Doxygen produces.
+Only kinds produced by Doxygen from C/C++ code are modeled.  Everything else
+is ignored after a warning.
+
+Currently the member entities are not completely parsed from the XML files, and
+the interface may need additional work to provide convenient access to all
+member types and their common properties.  For now, focus is in modeling the
+compound entities.
+
+The implementation is mostly independent of any GROMACS-specific rules, except
+for the following:
+ - DocType.library is a GROMACS-specific construct that is deduced from the
+   contents of the detailed description (presence of a \libinternal command in
+   the Doxygen comment triggers it).
+ - DocType.internal is deduced from the presence of a \internal command that
+   covers the whole detailed description.
+ - List of extensions for determining whether a file is a source file only
+   contains extensions actually used by GROMACS.
+It would be possible to move these out from this file, but that would require
+exposing the XML representation for the descriptions, which is not nice either.
+
+The module can also be run as a script that can dump out different parts of the
+object model.  This can be used to debug the parser, as well as check what is
+actually in the XML documentation.
+"""
+
+import os.path
+import xml.etree.ElementTree as ET
+
+import reporter
+
+#####################################################################
+# Helper functions and classes
+
+def _show_list(title, objlist):
+    """Helper function for formatting a list of objects for debug output."""
+    if objlist:
+        print '{0}:'.format(title)
+        for obj in objlist:
+            print '  ', obj
+
+class DocType(object):
+
+    """Documentation visibility in the generated documentation."""
+
+    # Mapping to string representations for the internal integer values
+    _names = ['none', 'internal', 'library', 'public']
+
+    def __init__(self, value):
+        """Initialize a DocType instance.
+
+        DocType.{none,internal,library,public} should be used outside the class
+        instead of calling the constructor.
+        """
+        self._value = value
+
+    def __str__(self):
+        """Return string representation for the documentation type."""
+        return self._names[self._value]
+
+    def __cmp__(self, other):
+        """Order documentation types in the order of visibility."""
+        return cmp(self._value, other._value)
+
+# Static values for documentation types.
+DocType.none = DocType(0)
+DocType.internal = DocType(1)
+DocType.library = DocType(2)
+DocType.public = DocType(3)
+
+class Location(object):
+
+    """Location of a Doxygen entity.
+
+    This class contains the logic to parse a <location> tag in Doxygen XML.
+    It is used as the entity location in cases where body location is not
+    expected, or as part of a LocationWithBody.
+    """
+
+    def __init__(self, elem):
+        """Initialize location from a <location> element."""
+        self.filepath = elem.attrib['file']
+        self.line = int(elem.attrib['line'])
+        self.column = elem.attrib['column']
+
+    def __str__(self):
+        return '{0}:{1}'.format(self.filepath, self.line)
+
+    def get_reporter_location(self):
+        return reporter.Location(self.filepath, self.line)
+
+    def get_full_string(self):
+        return '{0}:{1}:{2}'.format(self.filepath, self.line, self.column)
+
+class BodyLocation(object):
+
+    """Body location of a Doxygen entity.
+
+    This class contains the logic to parse a body location from a <location>
+    tag in Doxygen XML.  Not all entities have these attributes.
+    This is only used as part of a LocationWithBody, which handles cases where
+    the body location is optional.
+
+    The body location can be compared and hashed so that it can be used in
+    a dictionary for DocumentationSet.merge_duplicates().
+    """
+
+    def __init__(self, elem):
+        """Initialize body location from a <location> element."""
+        self.filepath = elem.attrib['bodyfile']
+        self.startline = int(elem.attrib['bodystart'])
+        self.endline = int(elem.attrib['bodyend'])
+
+    def __cmp__(self, other):
+        result = cmp(self.filepath, other.filepath)
+        if result == 0:
+            result = cmp(self.startline, other.startline)
+        if result == 0:
+            result = cmp(self.endline, other.endline)
+        return result
+
+    def __hash__(self):
+        return hash(self.filepath) ^ hash(self.startline) ^ hash(self.endline)
+
+    def __str__(self):
+        return '{0}:{1}'.format(self.filepath, self.startline)
+
+    def get_full_string(self):
+        if self.endline < 0:
+            return self.__str__()
+        return '{0}:{1}-{2}'.format(self.filepath, self.startline, self.endline)
+
+class LocationWithBody(object):
+
+    """Location for a Doxygen entity that can have a body location.
+
+    This class is used to represent the location of a Doxygen entity that can
+    have a body location.
+    """
+
+    def __init__(self, elem):
+        """Initialize location from a <location> element."""
+        self._location = Location(elem)
+        if 'bodyfile' in elem.attrib:
+            self._bodylocation = BodyLocation(elem)
+        else:
+            self._bodylocation = None
+
+    def __str__(self):
+        if not self._bodylocation:
+            return '{0} (no body)'.format(self._location)
+        else:
+            return '{0} / {1}'.format(self._location, self._bodylocation)
+
+    def get_reporter_location(self):
+        """Return reporter location for this location.
+
+        All issues are reported at the main location, which should match with
+        the declaration, where most of the documentation typically is.
+        """
+        return self._location.get_reporter_location()
+
+    def get_location(self):
+        return self._location
+
+    def get_body_location(self):
+        return self._bodylocation
+
+    def has_same_body_location(self):
+        """Check whether main location matches body location.
+
+        If the main location is different, then it likely points to the
+        declaration of the function.
+        """
+        return self._location.filepath == self._bodylocation.filepath and \
+                self._location.line == self._bodylocation.startline
+
+class MemberSection(object):
+
+    """Section of members within a compound entity."""
+
+    def __init__(self, kind):
+        self._kind = kind
+        self._members = []
+
+    def __str__(self):
+        return self._kind
+
+    def add_member(self, member):
+        self._members.append(member)
+
+    def replace_member(self, old, new):
+        try:
+            pos = self._members.index(old)
+        except ValueError:
+            return
+        self._members[pos] = new
+
+#####################################################################
+# Documentation entities
+
+class Entity(object):
+
+    """Doxygen documentation entity.
+
+    This class represents common properties of an entity that can contain
+    Doxygen documentation.
+    """
+
+    def __init__(self, name, refid):
+        self._docset = None
+        self._name = name
+        self._id = refid
+        self._has_brief_description = False
+        self._has_detailed_description = False
+        self._has_inbody_description = False
+        self._visibility = DocType.none
+
+    def __str__(self):
+        return self._name
+
+    def _get_reporter(self):
+        """Return reporter to use for parsing issues."""
+        return self._docset.get_reporter()
+
+    def set_documentation_set(self, docset):
+        """Set the documentation set this entity belongs to.
+
+        The documentation set parent provides access to a common reporter
+        object, and also allows the entity to resolve references to other
+        entities while loading XML information.
+        """
+        assert self._docset is None
+        self._docset = docset
+
+    def get_id(self):
+        return self._id
+
+    def get_name(self):
+        return self._name
+
+    def get_reporter_location(self):
+        return reporter.Location('<{0}>'.format(self._name), None)
+
+    def get_visibility(self):
+        return self._visibility
+
+    def is_documented(self):
+        return self._visibility != DocType.none
+
+    def has_brief_description(self):
+        return self._has_brief_description
+
+    def has_inbody_description(self):
+        return self._has_inbody_description
+
+    def _process_descriptions(self, briefelem, detailselem, inbodyelem):
+        reporter = self._get_reporter()
+        if briefelem is not None and len(briefelem) > 0:
+            self._has_brief_description = True
+            self._visibility = DocType.public
+        if detailselem is not None and len(detailselem) > 0:
+            self._visibility = DocType.public
+            # Gromacs-specific:
+            # \internal is used at the beginning of a comment block to
+            # mark the block internal to the module.
+            # \libinternal is used similarly, and inserts custom XML
+            # elements.
+            if detailselem[0].tag == 'internal':
+                if len(detailselem) == 1:
+                    self._visibility = DocType.internal
+                else:
+                    # TODO: Should we also check if internal appears elsewhere?
+                    reporter.doc_note(self, '\internal does not cover whole documentation')
+            if detailselem[0].find('libinternal') is not None:
+                if self._visibility == DocType.public:
+                    self._visibility = DocType.library
+                else:
+                    reporter.doc_error(self, '\libinternal should not be used inside \internal')
+            self._has_detailed_description = True
+        if inbodyelem is not None:
+            self._has_inbody_description = (len(inbodyelem) > 0)
+
+    def show_base(self):
+        """Format information for common properties.
+
+        This is called from subclass show() methods to show base information
+        about the entity.
+        """
+        print 'ID:         {0}'.format(self._id)
+        print 'Name:       {0}'.format(self._name)
+        print 'Location:   {0}'.format(self.get_reporter_location())
+        doctype = []
+        if self._has_brief_description:
+            doctype.append('brief')
+        if self._has_detailed_description:
+            doctype.append('details')
+        if self._has_inbody_description:
+            doctype.append('in-body')
+        if not doctype:
+            doctype.append('none')
+        print 'Doc:        {0}'.format(', '.join(doctype))
+        print 'Visibility: {0}'.format(self._visibility)
+
+# Member entities
+
+class Member(Entity):
+
+    """Member entity.
+
+    In Doxygen, a member entity is an entity such as a function or an enum that
+    cannot contain other documented entities (an enum is a slight exception, as
+    enum values are still nested within the enum member).  A member always
+    belongs to one (or more) compounds, which means that the detailed
+    documentation for the member appears on the documentation page for that
+    compound.  If none of the parent compounds are documented, the member
+    doesn't appear anywhere, even if it is documented.
+
+    Member information is loaded from a parent compound's XML file.  If there
+    is more than one parent, the first one encountered will be used
+    (presumably, Doxygen duplicates the information into each XML file).
+    """
+
+    def __init__(self, name, refid):
+        Entity.__init__(self, name, refid)
+        self._parents = set()
+        self._location = None
+        self._alternates = set()
+        self._loaded = False
+        # TODO: Move to Entity?
+        self._xmlpath = None
+
+    def add_parent_compound(self, compound):
+        """Add a compound that contains this member."""
+        self._parents.add(compound)
+
+    def _get_raw_location(self):
+        """Returns the BodyLocation object associated with this member.
+
+        This is necessary so that EnumValue can override it report a non-empty
+        location: Doxygen doesn't provide any location for <enumvalue>.
+        """
+        return self._location
+
+    def get_parent_compounds(self):
+        return self._parents
+
+    def get_inherited_visibility(self):
+        return max([parent.get_visibility() for parent in self._parents])
+
+    def is_visible(self):
+        return self.get_inherited_visibility() != DocType.none
+
+    def has_same_body_location(self):
+        return self._get_raw_location().has_same_body_location()
+
+    def get_reporter_location(self):
+        return self._get_raw_location().get_reporter_location()
+
+    def get_location(self):
+        return self._get_raw_location().get_location()
+
+    def get_body_location(self):
+        return self._get_raw_location().get_body_location()
+
+    def merge_definition(self, definition):
+        self._parents.update(definition._parents)
+        self._alternates.add(definition)
+
+    def load_details_from_element(self, rootelem, xmlpath):
+        """Load details for the member from a given XML element.
+
+        This method is called when encountering member definitions while
+        processing a compound XML file to load the information for that member.
+        It processes common properties for a member, and delegates other
+        elements to _load_element().
+        """
+        if self._loaded:
+            # TODO: It would be nice to verify that the same information
+            # is present in all instances
+            return
+        self._xmlpath = xmlpath
+        # TODO: Process the attributes
+        reporter = self._get_reporter()
+        briefelem = None
+        detailselem = None
+        inbodyelem = None
+        for elem in rootelem:
+            if elem.tag == 'name':
+                if elem.text != self.get_name():
+                    reporter.xml_assert(xmlpath,
+                            "member name mismatch: '{0}' (in index.xml) vs. '{1}'".format(
+                                self.get_name(), elem.text))
+            elif elem.tag == 'briefdescription':
+                briefelem = elem
+            elif elem.tag == 'detaileddescription':
+                detailselem = elem
+            elif elem.tag == 'inbodydescription':
+                # TODO: in-body description is probably only possible for
+                # functions; move it there.
+                inbodyelem = elem
+            elif elem.tag == 'location':
+                self._location = LocationWithBody(elem)
+            else:
+                if not self._load_element(elem):
+                    # TODO Process the rest of the elements so that we can check this
+                    #reporter.xml_assert(xmlpath,
+                    #        "unknown member child element '{0}'".format(elem.tag))
+                    pass
+        self._process_descriptions(briefelem, detailselem, inbodyelem)
+        self._loaded = True
+
+    def _load_element(self, element):
+        """Load data from a child XML element.
+
+        This method is called for all XML elements under the <memberdef>
+        element that are not handled directly by the Member class.
+        Derived classes should return True if they process the element.
+        """
+        return False
+
+    def show(self):
+        self.show_base()
+        print 'Parent vis: {0}'.format(self.get_inherited_visibility())
+        print 'Location:   {0}'.format(self.get_location().get_full_string())
+        print 'Body loc:   {0}'.format(self.get_body_location().get_full_string())
+        _show_list('Parents', self._parents)
+
+class Define(Member):
+    pass
+
+class Variable(Member):
+    pass
+
+class Typedef(Member):
+    pass
+
+class Enum(Member):
+    def __init__(self, name, refid):
+        Member.__init__(self, name, refid)
+        self._values = set()
+
+    def _load_element(self, elem):
+        if elem.tag == 'enumvalue':
+            refid = elem.attrib['id']
+            # Doxygen seems to sometimes assign the same ID to a singleton enum
+            # value (this already triggers a warning in loading index.xml).
+            if refid == self.get_id():
+                return True
+            member = self._docset.get_member(refid)
+            member.set_enum(self)
+            member.load_details_from_element(elem, self._xmlpath)
+            self._values.add(member)
+            return True
+        return False
+
+    def get_values(self):
+        return self._values
+
+class EnumValue(Member):
+    def __init__(self, name, refid):
+        Member.__init__(self, name, refid)
+        self._enum = None
+
+    def set_enum(self, member):
+        assert self._enum is None
+        self._enum = member
+
+    def _get_raw_location(self):
+        return self._enum._get_raw_location()
+
+class Function(Member):
+    pass
+
+class FriendDeclaration(Member):
+    pass
+
+# Compound entities
+
+class Compound(Entity):
+
+    """Compound entity.
+
+    In Doxygen, a compound entity is an entity that has its own documentation
+    page, and can contain other documented entities (either members, or other
+    compounds).  Examples of compounds are files and classes.
+    A compound entity always appears in the documentation, even if it is
+    contained in another compound that is not documented.
+
+    The list of members for a compound is initialized when the XML index file
+    is read.  All other information is loaded from an XML file that is specific
+    to the compound.  In addition to describing the compound, this XML file
+    contains references to contained compounds, and details of all members
+    within the compound.
+    """
+    def __init__(self, name, refid):
+        Entity.__init__(self, name, refid)
+        self._members = dict()
+        self._children = set()
+        self._sections = []
+        self._groups = set()
+        self._loaded = False
+
+    def _get_xml_path(self):
+        """Return path to the details XML file for this compound."""
+        return os.path.join(self._docset.get_xmlroot(), self.get_id() + '.xml')
+
+    def add_member(self, member):
+        """Add a contained member."""
+        self._members[member.get_id()] = member
+
+    def add_group(self, compound):
+        """Add a group (a compound entity) that contains this entity."""
+        self._groups.add(compound)
+
+    def replace_member(self, old, new):
+        if old.get_id() not in self._members:
+            raise ValueError("Trying to replace a non-existent member")
+        elif new.get_id() in self._members:
+            raise ValueError("Trying to replace with an existing member")
+        self._members[old.get_id()] = new
+        for section in self._sections:
+            section.replace_member(old, new)
+
+    def load_details(self):
+        """Load details for the compound from its details XML file.
+
+        This method processes common properties for a compound.
+        References to inner compounds are delegated to _load_inner_*() methods,
+        and all members encountered in the XML file are loaded with
+        Member.load_details_from_element().
+        Other elements are delegated to _load_element().
+        """
+        if self._loaded:
+            return
+        reporter = self._get_reporter()
+        xmlpath = self._get_xml_path()
+        compoundtree = ET.parse(xmlpath)
+        root = compoundtree.getroot()
+        if len(root) > 1:
+            reporter.xml_assert(xmlpath, "more than one compound in a file")
+        if root[0].tag != 'compounddef':
+            reporter.xml_assert(xmlpath, "expected <compounddef> as the first tag")
+            return
+        briefelem = None
+        detailselem = None
+        missing_members = set(self._members.values())
+        for elem in root[0]:
+            if elem.tag == 'compoundname':
+                if elem.text != self.get_name():
+                    reporter.xml_assert(xmlpath,
+                            "compound name mismatch: '{0}' (in index.xml) vs. '{1}'"
+                            .format(self.get_name(), elem.text))
+            elif elem.tag == 'briefdescription':
+                briefelem = elem
+            elif elem.tag == 'detaileddescription':
+                detailselem = elem
+            elif elem.tag in ('includes', 'includedby', 'incdepgraph',
+                    'invincdepgraph', 'inheritancegraph', 'collaborationgraph',
+                    'programlisting', 'templateparamlist', 'listofallmembers'):
+                pass
+            elif elem.tag.startswith('inner'):
+                refid = elem.attrib['refid']
+                reftype = elem.tag[5:]
+                # TODO: Handle 'prot' attribute?
+                refcompound = self._docset.get_compound(refid)
+                self._children.add(refcompound)
+                if reftype == 'file':
+                    self._load_inner_file(refcompound)
+                elif reftype == 'dir':
+                    self._load_inner_dir(refcompound)
+                elif reftype == 'group':
+                    self._load_inner_group(refcompound)
+                elif reftype == 'namespace':
+                    self._load_inner_namespace(refcompound)
+                elif reftype == 'class':
+                    self._load_inner_class(refcompound)
+                else:
+                    reporter.xml_assert(xmlpath,
+                            "unknown inner compound type '{0}'".format(reftype))
+            elif elem.tag == 'sectiondef':
+                # TODO: Handle header and description elements
+                kind = elem.attrib['kind']
+                section = MemberSection(kind)
+                self._sections.append(section)
+                for memberelem in elem.iter('memberdef'):
+                    refid = memberelem.attrib['id']
+                    member = self._members[refid]
+                    member.load_details_from_element(memberelem, xmlpath)
+                    section.add_member(member)
+                    if member in missing_members:
+                        missing_members.remove(member)
+                    # Enum values need special handling, but are not worth
+                    # extra generalization.
+                    if isinstance(member, Enum):
+                        missing_members.difference_update(member.get_values())
+            else:
+                if not self._load_element(elem):
+                    reporter.xml_assert(xmlpath,
+                            "unknown compound child element '{0}'".format(elem.tag))
+        if missing_members:
+            reporter.xml_assert(xmlpath, 'members without section')
+        self._process_descriptions(briefelem, detailselem, None)
+        self._loaded = True
+
+    def _unexpected_inner_compound(self, typename, compound):
+        """Report a parsing error for an unexpected inner compound reference."""
+        reporter = self._get_reporter()
+        xmlpath = self._get_xml_path()
+        reporter.xml_assert(xmlpath,
+                "unexpected inner {0}: {1}".format(typename, compound))
+
+    def _load_inner_file(self, compound):
+        """Process a reference to an inner file.
+
+        Derived classes should override the method if the compound type can
+        contain files as nested compounds.
+        """
+        self._unexpected_inner_compound("file", compound)
+
+    def _load_inner_dir(self, compound):
+        """Process a reference to an inner directory.
+
+        Derived classes should override the method if the compound type can
+        contain directories as nested compounds.
+        """
+        self._unexpected_inner_compound("dir", compound)
+
+    def _load_inner_group(self, compound):
+        """Process a reference to an inner group.
+
+        Derived classes should override the method if the compound type can
+        contain groups as nested compounds.
+        """
+        self._unexpected_inner_compound("group", compound)
+
+    def _load_inner_namespace(self, compound):
+        """Process a reference to an inner namespace.
+
+        Derived classes should override the method if the compound type can
+        contain namespaces as nested compounds.
+        """
+        self._unexpected_inner_compound("namespace", compound)
+
+    def _load_inner_class(self, compound):
+        """Process a reference to an inner class.
+
+        Derived classes should override the method if the compound type can
+        contain classes as nested compounds.
+        """
+        self._unexpected_inner_compound("class", compound)
+
+    def _load_element(self, element):
+        """Load data from a child XML element.
+
+        This method is called for all XML elements under the <compounddef>
+        element that are not handled directly by the Compound class.
+        Derived classes should return True if they process the element.
+        """
+        return False
+
+    def get_groups(self):
+        return self._groups
+
+    def show_base(self):
+        """Format information for common properties.
+
+        This extends Entity.show_base() by adding properties that are common to
+        all compounds.
+        """
+        Entity.show_base(self)
+        if self._groups:
+            print 'Groups:   {0}'.format(', '.join(map(str, self._groups)))
+
+    def show_members(self):
+        """Show list of members.
+
+        This method is provided for use in show() methods of derived classes
+        to print the list of members.
+        """
+        for section in self._sections:
+            print 'Member section: {0}'.format(section)
+            for member in section._members:
+                print '  ', member
+
+class File(Compound):
+    def __init__(self, name, refid):
+        Compound.__init__(self, name, refid)
+        self._path = None
+        self._directory = None
+        self._classes = set()
+        self._namespaces = set()
+        self._is_source_file = None
+
+    def _load_inner_class(self, compound):
+        compound.add_file(self)
+        self._classes.add(compound)
+
+    def _load_inner_namespace(self, compound):
+        compound.add_file(self)
+        self._namespaces.add(compound)
+
+    def _load_element(self, elem):
+        if elem.tag == 'location':
+            self._path = elem.attrib['file']
+            extension = os.path.splitext(self._path)[1]
+            self._is_source_file = (extension in ('.c', '.cpp', '.cu'))
+            return True
+        return False
+
+    def set_directory(self, directory):
+        self._directory = directory
+
+    def get_reporter_location(self):
+        return reporter.Location(self._path, None)
+
+    def get_path(self):
+        return self._path
+
+    def get_directory(self):
+        return self._directory
+
+    def is_source_file(self):
+        return self._is_source_file
+
+    def show(self):
+        self.show_base()
+        print 'Path:      {0}'.format(self._path)
+        print 'Directory: {0}'.format(self._directory)
+        print 'Source:    {0}'.format(self._is_source_file)
+        _show_list('Namespaces', self._namespaces)
+        _show_list('Classes', self._classes)
+        self.show_members()
+
+class Directory(Compound):
+    def __init__(self, name, refid):
+        Compound.__init__(self, name, refid)
+        self._path = None
+        self._parent = None
+        self._subdirs = set()
+        self._files = set()
+
+    def _load_inner_file(self, compound):
+        compound.set_directory(self)
+        self._files.add(compound)
+
+    def _load_inner_dir(self, compound):
+        compound._parent = self
+        self._subdirs.add(compound)
+
+    def _load_element(self, elem):
+        if elem.tag == 'location':
+            self._path = elem.attrib['file']
+            return True
+        return False
+
+    def get_reporter_location(self):
+        return reporter.Location(self._path, None)
+
+    def get_path(self):
+        return self._path
+
+    def get_parent(self):
+        return self._parent
+
+    def get_subdirectories(self):
+        return self._subdirs
+
+    def show(self):
+        self.show_base()
+        print 'Path:      {0}'.format(self._path)
+        if self._parent:
+            print 'Parent:    {0}'.format(self._parent)
+        _show_list('Subdirectories', self._subdirs)
+        _show_list('Files', self._files)
+
+class Group(Compound):
+    def __init__(self, name, refid):
+        Compound.__init__(self, name, refid)
+        self._title = None
+        self._files = set()
+        self._nestedgroups = set()
+        self._namespaces = set()
+        self._classes = set()
+
+    def _load_inner_file(self, compound):
+        compound.add_group(self)
+        self._files.add(compound)
+
+    # Doxygen 1.8.5 doesn't seem to put the directories into the XML output,
+    # even though they are in the HTML output as group members...
+
+    def _load_inner_group(self, compound):
+        compound.add_group(self)
+        self._nestedgroups.add(compound)
+
+    def _load_inner_namespace(self, compound):
+        compound.add_group(self)
+        self._namespaces.add(compound)
+
+    def _load_inner_class(self, compound):
+        compound.add_group(self)
+        self._classes.add(compound)
+
+    def _load_element(self, elem):
+        if elem.tag == 'title':
+            self._title = elem.text
+            return True
+        return False
+
+    def show(self):
+        self.show_base()
+        print 'Title:     {0}'.format(self._title)
+        print 'Inner compounds:'
+        for compound in self._children:
+            print '  ', compound
+        self.show_members()
+
+class Namespace(Compound):
+    def __init__(self, name, refid):
+        Compound.__init__(self, name, refid)
+        self._doclocation = None
+        self._files = set()
+        self._parent = None
+        self._innernamespaces = set()
+        self._classes = set()
+
+    def _load_inner_namespace(self, compound):
+        compound._parent = self
+        self._innernamespaces.add(compound)
+
+    def _load_inner_class(self, compound):
+        compound.set_namespace(self)
+        self._classes.add(compound)
+
+    def _load_element(self, elem):
+        if elem.tag == 'location':
+            self._doclocation = Location(elem)
+            return True
+        return False
+
+    def add_file(self, compound):
+        self._files.add(compound)
+
+    def get_reporter_location(self):
+        return self._doclocation.get_reporter_location()
+
+    def show(self):
+        self.show_base()
+        print 'Doc. loc.: {0}'.format(self._doclocation.get_full_string())
+        _show_list('Inner namespaces', self._innernamespaces)
+        _show_list('Classes', self._classes)
+        self.show_members()
+
+class Class(Compound):
+    def __init__(self, name, refid):
+        Compound.__init__(self, name, refid)
+        self._location = None
+        self._namespace = None
+        self._files = set()
+        self._baseclasses = []
+        self._derivedclasses = set()
+        self._outerclass = None
+        self._innerclasses = set()
+
+    def _load_inner_class(self, compound):
+        compound.set_outer_class(self)
+        self._innerclasses.add(compound)
+
+    def _load_element(self, elem):
+        if elem.tag == 'basecompoundref':
+            # TODO: Handle unknown bases?
+            if 'refid' in elem.attrib:
+                refid = elem.attrib['refid']
+                # TODO: Handle prot and virt attributes, check name?
+                base = self._docset.get_compound(refid)
+                self._baseclasses.append(base)
+            return True
+        if elem.tag == 'derivedcompoundref':
+            refid = elem.attrib['refid']
+            # TODO: Handle prot and virt attributes, check name?
+            derived = self._docset.get_compound(refid)
+            self._derivedclasses.add(derived)
+            return True
+        elif elem.tag == 'location':
+            self._location = LocationWithBody(elem)
+            return True
+        return False
+
+    def add_file(self, compound):
+        self._files.add(compound)
+
+    def set_namespace(self, compound):
+        self._namespace = compound
+
+    def set_outer_class(self, compound):
+        self._outerclass = compound
+
+    def get_reporter_location(self):
+        return self._location.get_reporter_location()
+
+    def get_files(self):
+        return self._files
+
+    def is_local(self):
+        if len(self._files) > 1:
+            return False
+        for fileobj in self._files:
+            if not fileobj.is_source_file():
+                return False
+        return True
+
+    def show(self):
+        self.show_base()
+        print 'Namespace:  {0}'.format(self._namespace)
+        if self._outerclass:
+            print 'Outer cls:  {0}'.format(self._outerclass)
+        location = self._location
+        print 'Location:   {0}'.format(location.get_location().get_full_string())
+        print 'Body loc:   {0}'.format(location.get_body_location().get_full_string())
+        _show_list('Inner classes', self._innerclasses)
+        self.show_members()
+
+#####################################################################
+# Top-level container class
+
+def _get_compound_type_from_kind(kind):
+    """Map compound kinds from Doxygen XML to internal class types."""
+    if kind == 'file':
+        return File
+    elif kind == 'dir':
+        return Directory
+    elif kind == 'group':
+        return Group
+    elif kind == 'namespace':
+        return Namespace
+    elif kind in ('class', 'struct', 'union'):
+        return Class
+    else:
+        return None
+
+def _get_member_type_from_kind(kind):
+    """Map member kinds from Doxygen XML to internal class types."""
+    if kind == 'define':
+        return Define
+    elif kind == 'variable':
+        return Variable
+    elif kind == 'typedef':
+        return Typedef
+    elif kind == 'enum':
+        return Enum
+    elif kind == 'enumvalue':
+        return EnumValue
+    elif kind == 'function':
+        return Function
+    elif kind == 'friend':
+        return FriendDeclaration
+    else:
+        return None
+
+class DocumentationSet(object):
+
+    """Root object for Doxygen XML documentation tree.
+
+    On initialization, it reads the index.xml file from the Doxygen XML output,
+    which contains the list of entities.  Only the ID and name for the entities,
+    and the parent compounds for members, are available from this file.
+
+    load_details() can be called to load the detailed compound XML files.
+    This constructs relations between compound entities, and initializes other
+    attributes for the entities.
+
+    merge_duplicates() can then be called to remove members with different IDs,
+    but that actually reference the same code entity.  For some reason, Doxygen
+    seems to produce these in certain cases.
+    """
+
+    def __init__(self, xmlroot, reporter):
+        """Initialize the documentation set and read index data."""
+        self._xmlroot = xmlroot
+        self._reporter = reporter
+        xmlpath = os.path.join(xmlroot, 'index.xml')
+        indextree = ET.parse(xmlpath)
+        self._compounds = dict()
+        self._members = dict()
+        self._files = dict()
+        for compoundelem in indextree.getroot():
+            name = compoundelem.find('name').text
+            refid = compoundelem.attrib['refid']
+            kind = compoundelem.attrib['kind']
+            if kind in ('page', 'example'):
+                # TODO: Model these types as well
+                continue
+            compoundtype = _get_compound_type_from_kind(kind)
+            if compoundtype is None:
+                reporter.xml_assert(xmlpath,
+                        "unknown compound kind '{0}'".format(kind))
+                continue
+            compound = compoundtype(name, refid)
+            compound.set_documentation_set(self)
+            self._compounds[refid] = compound
+            for memberelem in compoundelem.iter('member'):
+                name = memberelem.find('name').text
+                refid = memberelem.attrib['refid']
+                kind = memberelem.attrib['kind']
+                if refid in self._members:
+                    member = self._members[refid]
+                    membertype = _get_member_type_from_kind(kind)
+                    if not isinstance(member, membertype):
+                        reporter.xml_assert(xmlpath,
+                                "id '{0}' used for multiple kinds of members"
+                                .format(refid))
+                        continue
+                else:
+                    membertype = _get_member_type_from_kind(kind)
+                    if membertype is None:
+                        reporter.xml_assert(xmlpath,
+                                "unknown member kind '{0}'".format(kind))
+                        continue
+                    member = membertype(name, refid)
+                    member.set_documentation_set(self)
+                    self._members[refid] = member
+                member.add_parent_compound(compound)
+                compound.add_member(member)
+
+    def load_details(self):
+        """Load detailed XML files for each compound."""
+        for compound in self._compounds.itervalues():
+            compound.load_details()
+            if isinstance(compound, File):
+                self._files[compound.get_path()] = compound
+        # TODO: Add links to files using location
+
+    def merge_duplicates(self):
+        """Merge duplicate member definitions based on body location.
+
+        At least for functions that are declared in a header, but have their
+        body in a source file, Doxygen seems to create two different IDs, but
+        the contents of the members are the same, except for the location
+        attribute.  This method merges members that have identical name and
+        body location into a single member that keeps the information from both
+        instances (they should only differ in the location attribute and in
+        parent compounds).  Both IDs point to the merged member after this
+        method.
+        """
+        members_by_body = dict()
+        for member in self._members.itervalues():
+            bodyloc = member.get_body_location()
+            if bodyloc:
+                index = (bodyloc, type(member), member.get_name())
+                if index not in members_by_body:
+                    members_by_body[index] = []
+                members_by_body[index].append(member)
+        for memberlist in members_by_body.itervalues():
+            if len(memberlist) > 1:
+                declaration = None
+                otherdeclarations = []
+                definition = None
+                for member in memberlist:
+                    if member.has_same_body_location():
+                        if definition is not None:
+                            self._reporter.xml_assert(None,
+                                    "duplicate definition for a member '{0}'"
+                                    .format(definition))
+                            continue
+                        definition = member
+                    elif declaration is None:
+                        declaration = member
+                    else:
+                        otherdeclarations.append(member)
+                if otherdeclarations:
+                    # TODO: gmx_cpuid.c produces some false positives
+                    details = []
+                    for otherdeclaration in otherdeclarations:
+                        details.append('{0}: another declaration is here'
+                                .format(otherdeclaration.get_reporter_location()))
+                    details.append('{0}: definition is here'
+                            .format(declaration.get_body_location()))
+                    text = "duplicate declarations for a member '{0}'".format(declaration)
+                    self._reporter.code_issue(declaration, text, details)
+                    continue
+                self._members[definition.get_id()] = declaration
+                declaration.merge_definition(definition)
+                for compound in definition.get_parent_compounds():
+                    compound.replace_member(definition, declaration)
+
+    def get_reporter(self):
+        """Return reporter object to use for reporting issues.
+
+        This method is used in the entity classes to access the reporter when
+        they are parsing the XML files.
+        """
+        return self._reporter
+
+    def get_xmlroot(self):
+        """Return root of the Doxygen XML directory."""
+        return self._xmlroot
+
+    def get_compound(self, refid):
+        return self._compounds[refid]
+
+    def get_member(self, refid):
+        return self._members[refid]
+
+    def get_compounds(self, types, predicate=None):
+        result = []
+        for compound in self._compounds.itervalues():
+            if isinstance(compound, types) and \
+                    (predicate is None or predicate(compound)):
+                result.append(compound)
+        return result
+
+    def get_members(self, types=None, predicate=None):
+        # self._members can contain duplicates because of merge_duplicates()
+        result = set()
+        for member in self._members.itervalues():
+            if (types is None or isinstance(member, types)) and \
+                    (predicate is None or predicate(member)):
+                result.add(member)
+        return list(result)
+
+    def get_files(self, paths=None):
+        if paths:
+            return self.get_compounds(File, lambda x: x.get_name().endswith(paths))
+        else:
+            return self.get_compounds(File)
+
+    def get_directories(self, paths):
+        return self.get_compounds(Directory, lambda x: x.get_name().endswith(paths))
+
+    def get_groups(self, name):
+        return self.get_compounds(Group, lambda x: x.get_name() in name)
+
+    def get_namespaces(self, name):
+        return self.get_compounds(Namespace, lambda x: x.get_name() in name)
+
+    def get_classes(self, name=None):
+        if name:
+            return self.get_compounds(Class, lambda x: x.get_name() in name)
+        else:
+            return self.get_compounds(Class)
+
+    def get_functions(self, name):
+        return self.get_members(Member, lambda x: x.get_name() in name)
+
+#####################################################################
+# Code for running in script mode
+
+def main():
+    """Run the script in for debugging/Doxygen XML output inspection."""
+    import sys
+
+    from optparse import OptionParser
+
+    from reporter import Reporter
+
+    parser = OptionParser()
+    parser.add_option('-R', '--root-dir',
+                      help='Doxygen XML root directory')
+    parser.add_option('-F', '--show-file', action='append',
+                      help='Show contents of given file')
+    parser.add_option('-d', '--show-dir', action='append',
+                      help='Show contents of given directory')
+    parser.add_option('-g', '--show-group', action='append',
+                      help='Show contents of given group')
+    parser.add_option('-n', '--show-namespace', action='append',
+                      help='Show contents of given namespace')
+    parser.add_option('-c', '--show-class', action='append',
+                      help='Show contents of given class')
+    # TODO: Add option for other types, and make them work
+    parser.add_option('-f', '--show-function', action='append',
+                      help='Show details of given function')
+    options, args = parser.parse_args()
+
+    reporter = Reporter()
+
+    sys.stderr.write('Loading index.xml...\n')
+    docset = DocumentationSet(options.root_dir, reporter)
+    reporter.write_pending()
+    sys.stderr.write('Loading details...\n')
+    docset.load_details()
+    reporter.write_pending()
+    sys.stderr.write('Processing...\n')
+    docset.merge_duplicates()
+    reporter.write_pending()
+
+    objlist = []
+    if options.show_file:
+        objlist.extend(docset.get_files(tuple(options.show_file)))
+    if options.show_dir:
+        objlist.extend(docset.get_directories(tuple(options.show_dir)))
+    if options.show_group:
+        objlist.extend(docset.get_groups(tuple(options.show_group)))
+    if options.show_namespace:
+        # TODO: Replace file names with anonymous_namespace{filename}
+        objlist.extend(docset.get_namespaces(tuple(options.show_namespace)))
+    if options.show_class:
+        objlist.extend(docset.get_classes(tuple(options.show_class)))
+    if options.show_function:
+        objlist.extend(docset.get_functions(tuple(options.show_function)))
+    for obj in objlist:
+        obj.show()
+
+if __name__ == '__main__':
+    main()
diff --git a/doxygen/getInstalledHeaders.cmake b/doxygen/getInstalledHeaders.cmake
new file mode 100644 (file)
index 0000000..60fe805
--- /dev/null
@@ -0,0 +1,53 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+function (generate_installed_file_list SRCDIR BUILDDIR OUTFILE)
+    file(GLOB_RECURSE INSTALL_FILE_LIST "${BUILDDIR}/cmake_install.cmake")
+    set(MATCH_REGEX "(${SRCDIR}|${BUILDDIR})/.*\\.h")
+    set(HEADER_LIST)
+    foreach (INSTALL_FILE ${INSTALL_FILE_LIST})
+        file(STRINGS ${INSTALL_FILE} HEADER_LINES REGEX "${MATCH_REGEX}")
+        foreach (HEADER_LINE ${HEADER_LINES})
+            string (REGEX MATCH "${MATCH_REGEX}" HEADER "${HEADER_LINE}")
+            list(APPEND HEADER_LIST "${HEADER}")
+        endforeach ()
+    endforeach ()
+    string(REPLACE ";" "\n" HEADER_LIST "${HEADER_LIST}")
+    file(WRITE ${OUTFILE} "${HEADER_LIST}")
+endfunction ()
+
+if (NOT DEFINED SRCDIR OR NOT DEFINED BUILDDIR OR NOT DEFINED OUTFILE)
+    message(FATAL_ERROR "Required input variable (SRCDIR, BUILDDIR, OUTFILE) not set")
+endif ()
+generate_installed_file_list(${SRCDIR} ${BUILDDIR} ${OUTFILE})
diff --git a/doxygen/gmxtree.py b/doxygen/gmxtree.py
new file mode 100644 (file)
index 0000000..2595c44
--- /dev/null
@@ -0,0 +1,455 @@
+#!/usr/bin/python
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+"""GROMACS-specific representation for source tree and documentation.
+
+This module provides classes that construct a GROMACS-specific representation
+of the source tree and associate the Doxygen XML output with it.  It constructs
+an initial representation by walking the source tree in the file system, and
+then associates information from the Doxygen XML output into this.
+It also adds some additional knowledge from how the GROMACS source tree is
+organized to construct a representation that is easy to process and check as
+the top-level scripts expect.
+
+The object model is rooted at a GromacsTree object.  Currently, it constructs a
+representation of the source tree from the file system, but is otherwise mostly
+a thin wrapper around the Doxygen XML tree.  It already adds some relations and
+rules that come from GROMACS-specific knowledge.  In the future, more such
+customizations will be added.
+"""
+
+import os
+import os.path
+
+import doxygenxml as xml
+import reporter
+# We import DocType directly so that it is exposed from this module as well.
+from doxygenxml import DocType
+
+def _get_api_type_for_compound(grouplist):
+    """Helper function to deduce API type from Doxygen group membership."""
+    result = DocType.internal
+    for group in grouplist:
+        if isinstance(group, xml.Group):
+            if group.get_name() == 'group_publicapi':
+                result = DocType.public
+            elif group.get_name() == 'group_libraryapi':
+                result = DocType.library
+            # TODO: Check for multiple group membership
+    return result
+
+class File(object):
+
+    """Source/header file in the GROMACS tree."""
+
+    def __init__(self, path, directory):
+        """Initialize a file representation with basic information."""
+        self._path = path
+        self._dir = directory
+        self._rawdoc = None
+        self._installed = False
+        extension = os.path.splitext(path)[1]
+        self._sourcefile = (extension in ('.c', '.cc', '.cpp', '.cu'))
+        self._apitype = DocType.none
+        self._modules = set()
+
+    def set_doc_xml(self, rawdoc, sourcetree):
+        """Assiociate Doxygen documentation entity with the file."""
+        assert self._rawdoc is None
+        assert rawdoc.is_source_file() == self._sourcefile
+        self._rawdoc = rawdoc
+        if self._rawdoc.is_documented():
+            grouplist = self._rawdoc.get_groups()
+            self._apitype = _get_api_type_for_compound(grouplist)
+            for group in grouplist:
+                module = sourcetree.get_object(group)
+                if module:
+                    self._modules.add(module)
+
+    def set_installed(self):
+        """Mark the file installed."""
+        self._installed = True
+
+    def get_reporter_location(self):
+        return reporter.Location(self._path, None)
+
+    def is_installed(self):
+        return self._installed
+
+    def is_source_file(self):
+        return self._sourcefile
+
+    def is_test_file(self):
+        return self._dir.is_test_directory()
+
+    def is_documented(self):
+        return self._rawdoc and self._rawdoc.is_documented()
+
+    def has_brief_description(self):
+        return self._rawdoc and self._rawdoc.has_brief_description()
+
+    def get_path(self):
+        return self._path
+
+    def get_documentation_type(self):
+        if not self._rawdoc:
+            return DocType.none
+        return self._rawdoc.get_visibility()
+
+    def get_api_type(self):
+        return self._apitype
+
+    def get_expected_module(self):
+        return self._dir.get_module()
+
+    def get_doc_modules(self):
+        return self._modules
+
+class GeneratedFile(File):
+    pass
+
+class Directory(object):
+
+    """(Sub)directory in the GROMACS tree."""
+
+    def __init__(self, path, parent):
+        """Initialize a file representation with basic information."""
+        self._path = path
+        self._name = os.path.basename(path)
+        self._parent = parent
+        self._rawdoc = None
+        self._module = None
+        self._is_test_dir = False
+        if parent and parent.is_test_directory() or \
+                os.path.basename(path) in ('tests', 'legacytests'):
+            self._is_test_dir = True
+        self._subdirs = set()
+        if parent:
+            parent._subdirs.add(self)
+
+    def set_doc_xml(self, rawdoc, sourcetree):
+        """Assiociate Doxygen documentation entity with the directory."""
+        assert self._rawdoc is None
+        assert self._path == rawdoc.get_path().rstrip('/')
+        self._rawdoc = rawdoc
+
+    def set_module(self, module):
+        assert self._module is None
+        self._module = module
+
+    def get_name(self):
+        return self._name
+
+    def get_reporter_location(self):
+        return reporter.Location(self._path, None)
+
+    def is_test_directory(self):
+        return self._is_test_dir
+
+    def get_module(self):
+        if self._module:
+            return self._module
+        if self._parent:
+            return self._parent.get_module()
+        return None
+
+    def get_subdirectories(self):
+        return self._subdirs
+
+class Module(object):
+
+    """Code module in the GROMACS source tree.
+
+    Modules are specific subdirectories that host a more or less coherent
+    set of routines.  Simplified, every subdirectory under src/gromacs/ is
+    a different module.  This object provides that abstraction and also links
+    the subdirectory to the module documentation (documented as a group in
+    Doxygen) if that exists.
+    """
+
+    def __init__(self, name, rootdir):
+        self._name = name
+        self._rawdoc = None
+        self._rootdir = rootdir
+
+    def set_doc_xml(self, rawdoc, sourcetree):
+        """Assiociate Doxygen documentation entity with the module."""
+        assert self._rawdoc is None
+        self._rawdoc = rawdoc
+
+    def is_documented(self):
+        return self._rawdoc is not None
+
+    def get_name(self):
+        return self._name
+
+class Class(object):
+
+    """Class/struct/union in the GROMACS source code."""
+
+    def __init__(self, rawdoc, files):
+        self._rawdoc = rawdoc
+        self._files = set(files)
+
+    def get_name(self):
+        return self._rawdoc.get_name()
+
+    def get_reporter_location(self):
+        return self._rawdoc.get_reporter_location()
+
+    def get_files(self):
+        return self._files
+
+    def is_documented(self):
+        return self._rawdoc.is_documented()
+
+    def has_brief_description(self):
+        return self._rawdoc.has_brief_description()
+
+    def get_documentation_type(self):
+        if not self.is_documented():
+            return DocType.none
+        if self._rawdoc.is_local():
+            return DocType.internal
+        return self._rawdoc.get_visibility()
+
+    def get_file_documentation_type(self):
+        return max([fileobj.get_documentation_type() for fileobj in self._files])
+
+    def is_in_installed_file(self):
+        return any([fileobj.is_installed() for fileobj in self._files])
+
+class GromacsTree(object):
+
+    """Root object for navigating the GROMACS source tree.
+
+    On initialization, the list of files and directories is initialized by
+    walking the source tree, and modules are created for top-level
+    subdirectories.  At this point, only information that is accessible from
+    file names and paths only is available.
+
+    set_installed_file_list() can be called to set the list of installed
+    files.
+
+    load_xml() can be called to load information from Doxygen XML data in
+    the build tree (the Doxygen XML data must have been built separately).
+    """
+
+    def __init__(self, source_root, build_root, reporter):
+        """Initialize the tree object by walking the source tree."""
+        self._source_root = os.path.abspath(source_root)
+        self._build_root = os.path.abspath(build_root)
+        self._reporter = reporter
+        self._docset = None
+        self._docmap = dict()
+        self._dirs = dict()
+        self._files = dict()
+        self._modules = dict()
+        self._classes = set()
+        self._walk_dir(os.path.join(self._source_root, 'src'))
+        rootdir = self._get_dir(os.path.join('src', 'gromacs'))
+        for subdir in rootdir.get_subdirectories():
+            self._create_module(subdir)
+        rootdir = self._get_dir(os.path.join('src', 'testutils'))
+        self._create_module(rootdir)
+
+    def _get_rel_path(self, path):
+        assert os.path.isabs(path)
+        if path.startswith(self._build_root):
+            return path[len(self._build_root)+1:]
+        if path.startswith(self._source_root):
+            return path[len(self._source_root)+1:]
+        raise ValueError("path not under build nor source tree: {0}".format(path))
+
+    def _walk_dir(self, rootpath):
+        """Construct representation of the source tree by walking the file system."""
+        assert os.path.isabs(rootpath)
+        assert rootpath not in self._dirs
+        relpath = self._get_rel_path(rootpath)
+        self._dirs[relpath] = Directory(rootpath, None)
+        for dirpath, dirnames, filenames in os.walk(rootpath):
+            if 'contrib' in dirnames:
+                dirnames.remove('contrib')
+            if 'refdata' in dirnames:
+                dirnames.remove('refdata')
+            currentdir = self._dirs[self._get_rel_path(dirpath)]
+            # Loop through a copy so that we can modify dirnames.
+            for dirname in list(dirnames):
+                fullpath = os.path.join(dirpath, dirname)
+                if fullpath == self._build_root:
+                    dirnames.remove(dirname)
+                    continue
+                relpath = self._get_rel_path(fullpath)
+                self._dirs[relpath] = Directory(fullpath, currentdir)
+            extensions = ('.h', '.cuh', '.hpp', '.c', '.cc', '.cpp', '.cu')
+            for filename in filenames:
+                basename, extension = os.path.splitext(filename)
+                if extension in extensions:
+                    fullpath = os.path.join(dirpath, filename)
+                    relpath = self._get_rel_path(fullpath)
+                    self._files[relpath] = File(fullpath, currentdir)
+                elif extension == '.cmakein':
+                    extension = os.path.splitext(basename)[1]
+                    if extension in extensions:
+                        fullpath = os.path.join(dirpath, basename)
+                        relpath = self._get_rel_path(fullpath)
+                        fullpath = os.path.join(dirpath, filename)
+                        self._files[relpath] = GeneratedFile(fullpath, currentdir)
+
+    def _create_module(self, rootdir):
+        """Create module for a subdirectory."""
+        name = 'module_' + rootdir.get_name()
+        moduleobj = Module(name, rootdir)
+        rootdir.set_module(moduleobj)
+        self._modules[name] = moduleobj
+
+    def load_xml(self):
+        """Load Doxygen XML information."""
+        xmldir = os.path.join(self._build_root, 'doxygen', 'xml')
+        self._docset = xml.DocumentationSet(xmldir, self._reporter)
+        self._docset.load_details()
+        self._docset.merge_duplicates()
+        self._load_dirs()
+        self._load_modules()
+        self._load_files()
+        self._load_classes()
+
+    def _load_dirs(self):
+        """Load Doxygen XML directory information."""
+        rootdirs = self._docset.get_compounds(xml.Directory,
+                lambda x: x.get_parent() is None)
+        for dirdoc in rootdirs:
+            self._load_dir(dirdoc, None)
+
+    def _load_dir(self, dirdoc, parent):
+        """Load Doxygen XML directory information for a single directory."""
+        path = dirdoc.get_path().rstrip('/')
+        if not os.path.isabs(path):
+            self._reporter.xml_assert(dirdoc.get_xml_path(),
+                    "expected absolute path in Doxygen-produced XML file")
+            return
+        relpath = self._get_rel_path(path)
+        dirobj = self._dirs.get(relpath)
+        if not dirobj:
+            dirobj = Directory(path, parent)
+            self._dirs[relpath] = dirobj
+        dirobj.set_doc_xml(dirdoc, self)
+        self._docmap[dirdoc] = dirobj
+        for subdirdoc in dirdoc.get_subdirectories():
+            self._load_dir(subdirdoc, dirobj)
+
+    def _load_modules(self):
+        """Load Doxygen XML module (group) information."""
+        moduledocs = self._docset.get_compounds(xml.Group,
+                lambda x: x.get_name().startswith('module_'))
+        for moduledoc in moduledocs:
+            moduleobj = self._modules.get(moduledoc.get_name())
+            if not moduleobj:
+                self._reporter.input_error(
+                        "no matching directory for module: {0}".format(moduledoc))
+                continue
+            moduleobj.set_doc_xml(moduledoc, self)
+            self._docmap[moduledoc] = moduleobj
+
+    def _load_files(self):
+        """Load Doxygen XML file information."""
+        for filedoc in self._docset.get_files():
+            path = filedoc.get_path()
+            if not os.path.isabs(path):
+                self._reporter.xml_assert(filedoc.get_xml_path(),
+                        "expected absolute path in Doxygen-produced XML file")
+                continue
+            extension = os.path.splitext(filedoc.get_path())[1]
+            # We don't care about Markdown files that only produce pages
+            # (and fail the directory check below).
+            if extension == '.md':
+                continue
+            dirdoc = filedoc.get_directory()
+            if not dirdoc:
+                self._reporter.xml_assert(filedoc.get_xml_path(),
+                        "file is not in any directory in Doxygen")
+                continue
+            relpath = self._get_rel_path(path)
+            fileobj = self._files.get(relpath)
+            if not fileobj:
+                fileobj = File(path, self._docmap[dirdoc])
+                self._files[relpath] = fileobj
+            fileobj.set_doc_xml(filedoc, self)
+            self._docmap[filedoc] = fileobj
+
+    def _load_classes(self):
+        """Load Doxygen XML class information."""
+        classdocs = self._docset.get_classes()
+        for classdoc in classdocs:
+            files = [self._docmap[filedoc] for filedoc in classdoc.get_files()]
+            classobj = Class(classdoc, files)
+            self._docmap[classdoc] = classobj
+            self._classes.add(classobj)
+
+    def _get_dir(self, relpath):
+        """Get directory object for a path relative to source tree root."""
+        return self._dirs.get(relpath)
+
+    def set_installed_file_list(self, installedfiles):
+        """Set list of installed files."""
+        for path in installedfiles:
+            if not os.path.isabs(path):
+                self._reporter.input_error(
+                        "installed file not specified with absolute path: {0}"
+                        .format(path))
+                continue
+            relpath = self._get_rel_path(path)
+            if relpath not in self._files:
+                self._reporter.input_error(
+                        "installed file not in source tree: {0}".format(path))
+                continue
+            self._files[relpath].set_installed()
+
+    def get_object(self, docobj):
+        """Get tree object for a Doxygen XML object."""
+        return self._docmap.get(docobj)
+
+    def get_files(self):
+        """Get iterable for all files in the source tree."""
+        return self._files.itervalues()
+
+    def get_classes(self):
+        """Get iterable for all classes in the source tree."""
+        return self._classes
+
+    def get_members(self):
+        """Get iterable for all members (in Doxygen terms) in the source tree."""
+        # TODO: Add wrappers to solve some issues.
+        return self._docset.get_members()
diff --git a/doxygen/reporter.py b/doxygen/reporter.py
new file mode 100644 (file)
index 0000000..c486feb
--- /dev/null
@@ -0,0 +1,246 @@
+#!/usr/bin/python
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+import sys
+
+from fnmatch import fnmatch
+
+"""Central issue reporting implementation.
+
+This module implements a Reporter class that is used by other Python modules in
+this directory to report issues.  This allows central customization of the
+output format, and also a central implementation for redirecting/copying
+the output into a log file.  This class also implements sorting for the
+messages such that all issues from a single file are reported next to each
+other in the output, as well as filtering to make it possible to suppress
+certain messages.
+"""
+
+class Location(object):
+
+    """Location for a reported message."""
+
+    def __init__(self, filename, line):
+        """Create a location with the given file and line number.
+
+        One or both of the parameters can be None, but filename should be
+        specified if line is.
+        """
+        self.filename = filename
+        self.line = line
+
+    def __nonzero__(self):
+        """Make empty locations False in boolean context."""
+        return self.filename is not None
+
+    def __str__(self):
+        """Format the location as a string."""
+        if self.line:
+            return '{0}:{1}'.format(self.filename, self.line)
+        elif self.filename:
+            return self.filename
+        else:
+            return '<unknown>'
+
+    def __cmp__(self, other):
+        """Sort locations based on file name and line number."""
+        result = cmp(self.filename, other.filename)
+        if not self.filename or result != 0:
+            return result
+        return cmp(self.line, other.line)
+
+class Message(object):
+
+    """Single reported message.
+
+    This class stores the contents of a reporter message for later output to
+    allow sorting the output messages reasonably by the reported location.
+    """
+
+    def __init__(self, message, details=None, filename=None, location=None):
+        """Create a message object.
+
+        The message parameter provides the actual text, while optional details
+        provides a list of extra lines that provide context information for the
+        error.  filename and location provide two alternative ways of
+        specifying the location of the issue:
+         - if filename is provided, the issue is reported in that file, without
+           a line number
+         - if location is provided, it should be a Location instance
+        """
+        if filename:
+            self.location = Location(filename, None)
+        elif location:
+            self.location = location
+        else:
+            self.location = Location(None, None)
+        self.message = message
+        self.details = details
+
+    def __cmp__(self, other):
+        """Sort messages based on file name and line number."""
+        return cmp(self.location, other.location)
+
+class Filter(object):
+
+    """Filter expression to exclude messages."""
+
+    def __init__(self, filterline):
+        """Initialize a filter from a line in a filter file."""
+        self._orgline = filterline
+        filepattern, text = filterline.split(':', 1)
+        if filepattern == '*':
+            self._filematcher = lambda x: x is not None
+        elif filepattern:
+            self._filematcher = lambda x: x and fnmatch(x, '*/' + filepattern)
+        else:
+            self._filematcher = lambda x: x is None
+        self._textpattern = text.strip()
+        self._count = 0
+
+    def matches(self, message):
+        """Check whether the filter matches a message."""
+        if not self._filematcher(message.location.filename):
+            return False
+        if not fnmatch(message.message, self._textpattern):
+            return False
+        self._count += 1
+        return True
+
+    def get_match_count(self):
+        """Return the number of times this filter has matched."""
+        return self._count
+
+    def get_text(self):
+        """Return original line used to specify the filter."""
+        return self._orgline
+
+class Reporter(object):
+
+    """Collect and write out issues found by checker scripts."""
+
+    def __init__(self, logfile=None):
+        """Initialize the reporter.
+
+        If logfile is set to a file name, all issues will be written to this
+        file in addition to stderr.
+        """
+        self._logfp = None
+        if logfile:
+            self._logfp = open(logfile, 'w')
+        self._messages = []
+        self._filters = []
+
+    def _write(self, message):
+        """Implement actual message writing."""
+        wholemsg = ''
+        if message.location:
+            wholemsg += str(message.location) + ': '
+        wholemsg += message.message
+        if message.details:
+            wholemsg += '\n    ' + '\n    '.join(message.details)
+        wholemsg += '\n'
+        sys.stderr.write(wholemsg)
+        if self._logfp:
+            self._logfp.write(wholemsg)
+
+    def _report(self, message):
+        """Handle a single reporter message."""
+        for filterobj in self._filters:
+            if filterobj.matches(message):
+                return
+        if not message.location:
+            self._write(message)
+        else:
+            self._messages.append(message)
+
+    def load_filters(self, filterfile):
+        """Load filters for excluding messages from a file."""
+        with open(filterfile, 'r') as fp:
+            for filterline in fp:
+                filterline = filterline.strip()
+                if not filterline or filterline.startswith('#'):
+                    continue
+                self._filters.append(Filter(filterline))
+
+    def write_pending(self):
+        """Write out pending messages in sorted order."""
+        self._messages.sort()
+        for message in self._messages:
+            self._write(message)
+        self._messages = []
+
+    def report_unused_filters(self):
+        """Report filters that did not match any messages."""
+        for filterobj in self._filters:
+            if filterobj.get_match_count() == 0:
+                # TODO: Consider adding the input filter file as location
+                text = 'warning: unused filter: ' + filterobj.get_text()
+                self._write(Message(text))
+
+    def close_log(self):
+        """Close the log file if one exists."""
+        assert not self._messages
+        if self._logfp:
+            self._logfp.close()
+            self._logfp = None
+
+    def xml_assert(self, xmlpath, message):
+        """Report issues in Doxygen XML that violate assumptions in the script."""
+        self._report(Message('warning: ' + message, filename=xmlpath))
+
+    def input_error(self, message):
+        """Report issues in input files."""
+        self._report(Message('error: ' + message))
+
+    def file_error(self, fileobj, message):
+        """Report file-level issues."""
+        self._report(Message('error: ' + message,
+            location=fileobj.get_reporter_location()))
+
+    def code_issue(self, entity, message, details=None):
+        """Report an issue in a code construct (not documentation related)."""
+        self._report(Message('warning: ' + message, details,
+            location=entity.get_reporter_location()))
+
+    def doc_error(self, entity, message):
+        """Report an issue in documentation."""
+        self._report(Message('error: ' + entity.get_name() + ': ' + message,
+            location=entity.get_reporter_location()))
+
+    def doc_note(self, entity, message):
+        """Report a potential issue in documentation."""
+        self._report(Message('note: ' + entity.get_name() + ': ' + message,
+            location=entity.get_reporter_location()))
diff --git a/doxygen/suppressions.txt b/doxygen/suppressions.txt
new file mode 100644 (file)
index 0000000..f4946e5
--- /dev/null
@@ -0,0 +1,15 @@
+# These look like bugs in Doxygen 1.8.5
+src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_vendor'
+src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_x86_smt'
+src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_simd_suggest'
+
+# The script is currently a bit too eager
+share/template/template.cpp: error: source file documentation appears outside full documentation
+
+# This module name doesn't really fall into any currently used pattern; needs some thought
+: error: no matching directory for module: module_mdrun_integration_tests
+
+# These are real documentation issues that should be fixed
+src/gromacs/imd/imd.c: error: source file documentation appears outside full documentation
+src/gromacs/imd/imdsocket.c: error: source file documentation appears outside full documentation
+src/gromacs/imd/imd.h: note: init_IMD: has in-body comments, which are ignored