Run include order check in doc-check
[alexxy/gromacs.git] / docs / doxygen / doxygen-check.py
index f292b85abedb06194a0934b8aee968dd7d9b491b..899b19589b778eea2e2c3d12033a9058dbce3286 100755 (executable)
@@ -55,11 +55,24 @@ them to the script.
 import sys
 from optparse import OptionParser
 
+import gmxtree
 from gmxtree import GromacsTree, DocType
+from includesorter import IncludeSorter
 from reporter import Reporter
 
 def check_file(fileobj, reporter):
-    """Check file-level documentation."""
+    """Check file-level issues."""
+    if fileobj.is_source_file() and not fileobj.is_external() and \
+            fileobj.get_relpath().startswith('src/'):
+        includes = fileobj.get_includes()
+        if includes:
+            firstinclude = includes[0].get_file()
+            if not firstinclude or firstinclude.get_name() != "gmxpre.h":
+                reporter.code_issue(includes[0],
+                                    "does not include \"gmxpre.h\" first")
+        else:
+            reporter.code_issue(fileobj, "does not include \"gmxpre.h\"")
+
     if not fileobj.is_documented():
         # TODO: Add rules for required documentation
         return
@@ -68,7 +81,7 @@ def check_file(fileobj, reporter):
         # TODO: Add rule to exclude examples from this check
         if fileobj.is_installed():
             reporter.file_error(fileobj, "source file is installed")
-        if fileobj.get_documentation_type() != DocType.internal:
+        if fileobj.get_doc_type() != DocType.internal:
             reporter.file_error(fileobj,
                     "source file documentation appears outside full documentation")
         elif fileobj.get_api_type() != DocType.internal:
@@ -76,19 +89,19 @@ def check_file(fileobj, reporter):
     elif fileobj.is_test_file() and fileobj.is_installed():
         reporter.file_error(fileobj, "test file is installed")
     elif fileobj.is_installed():
-        if fileobj.get_documentation_type() != DocType.public:
+        if fileobj.get_doc_type() != DocType.public:
             reporter.file_error(fileobj,
                     "public header has non-public documentation")
-    elif fileobj.get_documentation_type() == DocType.public:
+    elif fileobj.get_doc_type() == DocType.public:
         reporter.file_error(fileobj,
                 "non-installed header has public documentation")
     elif fileobj.get_api_type() == DocType.public:
         reporter.file_error(fileobj,
                 "non-installed header specified as part of public API")
-    elif fileobj.get_documentation_type() < fileobj.get_api_type():
+    elif fileobj.get_doc_type() < fileobj.get_api_type():
         reporter.file_error(fileobj,
                 "API type ({0}) conflicts with documentation visibility ({1})"
-                .format(fileobj.get_api_type(), fileobj.get_documentation_type()))
+                .format(fileobj.get_api_type(), fileobj.get_doc_type()))
 
     if not fileobj.has_brief_description():
         reporter.file_error(fileobj,
@@ -110,40 +123,41 @@ def check_file(fileobj, reporter):
 
 def check_include(fileobj, includedfile, reporter):
     """Check an #include directive."""
+    otherfile = includedfile.get_file()
     if includedfile.is_system():
-        if includedfile.get_file():
-            reporter.code_issue(includedfile,
-                    "includes local file as {0}".format(includedfile))
-    else:
-        otherfile = includedfile.get_file()
-        if not otherfile:
-            reporter.code_issue(includedfile,
-                    "includes non-local file as {0}".format(includedfile))
-        elif fileobj.is_installed() and not includedfile.is_relative():
-            reporter.code_issue(includedfile,
-                    "installed header includes {0} using non-relative path"
-                    .format(includedfile))
         if not otherfile:
             return
-        if fileobj.is_installed() and not otherfile.is_installed():
-            reporter.code_issue(includedfile,
-                    "installed header includes non-installed {0}"
-                    .format(includedfile))
-        filemodule = fileobj.get_module()
-        othermodule = otherfile.get_module()
-        if fileobj.is_documented() and otherfile.is_documented():
-            filetype = fileobj.get_documentation_type()
-            othertype = otherfile.get_documentation_type()
-            if filetype > othertype:
-                reporter.code_issue(includedfile,
-                        "{0} file includes {1} file {2}"
-                        .format(filetype, othertype, includedfile))
-        check_api = (othermodule and othermodule.is_documented() and
-                filemodule != othermodule)
-        if check_api and otherfile.get_api_type() < DocType.library:
+        reporter.code_issue(includedfile,
+                "includes local file as {0}".format(includedfile))
+    if not otherfile:
+        reporter.code_issue(includedfile,
+                "includes non-local file as {0}".format(includedfile))
+    # TODO: Reinstantiate a check once there is clarity on what we want
+    # to enforce.
+    #elif fileobj.is_installed() and not includedfile.is_relative():
+    #    reporter.code_issue(includedfile,
+    #            "installed header includes {0} using non-relative path"
+    #            .format(includedfile))
+    if not otherfile:
+        return
+    if fileobj.is_installed() and not otherfile.is_installed():
+        reporter.code_issue(includedfile,
+                "installed header includes non-installed {0}"
+                .format(includedfile))
+    filemodule = fileobj.get_module()
+    othermodule = otherfile.get_module()
+    if fileobj.is_documented() and otherfile.is_documented():
+        filetype = fileobj.get_doc_type()
+        othertype = otherfile.get_doc_type()
+        if filetype > othertype:
             reporter.code_issue(includedfile,
-                    "included file {0} is not documented as exposed outside its module"
-                    .format(includedfile))
+                    "{0} file includes {1} file {2}"
+                    .format(filetype, othertype, includedfile))
+    check_api = (otherfile.api_type_is_reliable() and filemodule != othermodule)
+    if check_api and otherfile.get_api_type() < DocType.library:
+        reporter.code_issue(includedfile,
+                "included file {0} is not documented as exposed outside its module"
+                .format(includedfile))
 
 def check_entity(entity, reporter):
     """Check documentation for a code construct."""
@@ -156,8 +170,8 @@ def check_class(classobj, reporter):
     """Check documentation for a class/struct/union."""
     check_entity(classobj, reporter)
     if classobj.is_documented():
-        classtype = classobj.get_documentation_type()
-        filetype = classobj.get_file_documentation_type()
+        classtype = classobj.get_doc_type()
+        filetype = classobj.get_file_doc_type()
         if classtype == DocType.public and not classobj.is_in_installed_file():
             reporter.doc_error(classobj,
                     "has public documentation, but is not in installed header")
@@ -166,17 +180,184 @@ def check_class(classobj, reporter):
                     "is in {0} file(s), but appears in {1} documentation"
                     .format(filetype, classtype))
 
-def check_member(member, reporter):
+def check_member(member, reporter, check_ignored):
     """Check documentation for a generic member."""
     check_entity(member, reporter)
     if member.is_documented():
-        if not member.is_visible():
-            # TODO: This is triggered by members in anonymous namespaces.
+        if check_ignored and not member.is_visible():
             reporter.doc_note(member,
                     "is documented, but is ignored by Doxygen, because its scope is not documented")
         if member.has_inbody_description():
             reporter.doc_note(member, "has in-body comments, which are ignored")
 
+def check_cycles(graph, reporter):
+    """Check cyclic dependencies in a dependency graph.
+
+    The graph parameter provides the graph to check.  It should be an object
+    that has three methods:
+      iternodes():
+        Return the list of nodes in the graph.
+      iteredges(node):
+        Return the list of edges from a given node.
+        The list should contain (node, edge) pairs, where node is an object
+        returned by iternodes() and edge is any object.
+      report_cycle(cycle, reporter):
+        Process a found cycle. cycle contains a list of (node, edge) pairs
+        that describe the cycle.  edge is the edge object that leads _to_
+        the node in the cycle.
+
+    This is implemented using an extended DFS-based strongly connected
+    component (SCC) search, written using a stack instead of recursion.
+    The base algorithm is Tarjan's SCC search:
+      http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
+
+    Each back edge that is encountered during the search is reported as a
+    cycle.  Additionally, if a cross edge is encountered that is within the
+    current SCC, the target node and all its children in the current SCC will
+    be visited again to find all cycles.  All steps except cycle detection are
+    omitted for such re-traversal.
+
+    To avoid duplicates from cycles that do not include all nodes in an SCC,
+    a cycle is only reported if the target of the back edge is still active
+    in the search, i.e., all edges from it have not yet been traversed.
+    """
+    # The DFS stack; next node is always popped from the end.
+    # Stores (node, edge) pairs.
+    # edge is None for start nodes and for post-order processing.
+    dfsstack = []
+    for node in graph.iternodes():
+        dfsstack.append((node, None))
+    # Stack of visited nodes that have not yet been assigned to a strongly
+    # connected component.
+    visitstack = []
+    # List of nodes in the DFS recursion stack.
+    currlist = []
+    # Set of nodes in currlist for more efficient searching.
+    currset = set()
+    # Counter for initializing preorder.
+    visit_count = 0
+    # DFS pre-order for nodes: initialized when a node is first encountered
+    # in the search.
+    preorder = dict()
+    # Lowest pre-order index reachable from this node.
+    # Initialized to pre-order, and updated during post-order processing.
+    linkorder = dict()
+    # Set to True for a node when first encountered, and set to False when
+    # a strongly connected component has been processed.
+    in_progress = dict()
+    # The DFS search
+    while dfsstack:
+        currnode, curredge = dfsstack.pop()
+        # curredge is None if this is a start node or post-order traversal.
+        # currlist is empty if this is a start node.
+        if curredge is None and currlist:
+            # All children visited: post-order processing.
+            done = currlist.pop()[0]
+            assert done == currnode
+            currset.remove(currnode)
+            # If this is the first time this node is encountered, fill
+            # linkorder and check for strongly connected components.
+            if linkorder[currnode] == preorder[currnode]:
+                children = [x for x, dummy in graph.iteredges(currnode) if in_progress[x]]
+                if children:
+                    linkorder[currnode] = min([linkorder[x] for x in children])
+                if preorder[currnode] <= linkorder[currnode]:
+                    # This is a root of a strongly connected component.
+                    while visitstack:
+                        node = visitstack.pop()
+                        in_progress[node] = False
+                        if node == currnode:
+                            break
+                    else:
+                        assert False
+            continue
+        if currnode not in preorder:
+            # First encounter of this node: pre-order processing.
+            preorder[currnode] = visit_count
+            linkorder[currnode] = visit_count
+            visitstack.append(currnode)
+            visit_count += 1
+            in_progress[currnode] = True
+        elif not in_progress[currnode]:
+            # Do not enter processed components again.
+            continue
+        currlist.append((currnode, curredge))
+        currset.add(currnode)
+        # add entry for post-order traversal
+        dfsstack.append((currnode, None))
+        for nextnode, edge in graph.iteredges(currnode):
+            if nextnode not in preorder:
+                # Not seen previously: push
+                dfsstack.append((nextnode, edge))
+            else:
+                # If an already visited node is in the same component, it is
+                # either part of a cycle, or we need to traverse it again to
+                # find all cycles.
+                if in_progress[nextnode]:
+                    if nextnode not in currset:
+                        dfsstack.append((nextnode, edge))
+                    # Only report cycles to nodes that haven't been processed
+                    # yet to avoid duplicates.
+                    elif linkorder[nextnode] == preorder[nextnode]:
+                        for index in xrange(len(currlist)):
+                            if currlist[index][0] == nextnode:
+                                cycle = [(nextnode, edge)]
+                                cycle.extend(currlist[index+1:])
+                                graph.report_cycle(cycle, reporter)
+                                break
+                        else:
+                            assert False
+
+class ModuleDependencyGraph(object):
+
+    """Module dependency graph representation for check_cycles().
+
+    In the reported graph, the nodes are gmxtree.Module objects and the edges
+    are gmxtree.ModuleDependency objects.
+    """
+
+    def __init__(self, tree):
+        self._tree = tree
+
+    def iternodes(self):
+        for module in self._tree.get_modules():
+            if module.get_name() != 'module_testutils':
+                yield module
+
+    def iteredges(self, module):
+        for dependency in module.get_dependencies():
+            if dependency.get_other_module().get_name() != 'module_testutils':
+                yield (dependency.get_other_module(), dependency)
+
+    def report_cycle(self, cycle, reporter):
+        if any([x[1].is_cycle_suppressed() for x in cycle]):
+            # TODO: Report unused suppressions.
+            return
+        modulelist = ' -> '.join([x[0].get_name()[7:] for x in cycle])
+        summary = 'module-level cyclic dependency: ' + modulelist
+        reporter.cyclic_issue(summary)
+
+def check_all(tree, reporter, check_ignored):
+    """Do all checks for the GROMACS tree."""
+    includesorter = IncludeSorter()
+    for fileobj in tree.get_files():
+        if isinstance(fileobj, gmxtree.GeneratorSourceFile):
+            continue
+        check_file(fileobj, reporter)
+        for includedfile in fileobj.get_includes():
+            check_include(fileobj, includedfile, reporter)
+        if fileobj.should_includes_be_sorted() \
+                and not includesorter.check_sorted(fileobj):
+            reporter.code_issue(fileobj, "include order is not consistent")
+
+    for classobj in tree.get_classes():
+        check_class(classobj, reporter)
+
+    for memberobj in tree.get_members():
+        check_member(memberobj, reporter, check_ignored)
+
+    check_cycles(ModuleDependencyGraph(tree), reporter)
+
 def main():
     """Run the checking script."""
     parser = OptionParser()
@@ -184,24 +365,20 @@ def main():
                       help='Source tree root directory')
     parser.add_option('-B', '--build-root',
                       help='Build tree root directory')
-    parser.add_option('--installed',
-                      help='Read list of installed files from given file')
     parser.add_option('-l', '--log',
                       help='Write issues into a given log file in addition to stderr')
     parser.add_option('--ignore',
                       help='Set file with patterns for messages to ignore')
+    parser.add_option('--ignore-cycles',
+                      help='Set file with module dependencies to ignore in cycles')
     parser.add_option('--check-ignored', action='store_true',
-                      help='Check documentation ignored by Doxygen')
+                      help='Issue notes for comments ignored by Doxygen')
     parser.add_option('-q', '--quiet', action='store_true',
                       help='Do not write status messages')
+    parser.add_option('--exitcode', action='store_true',
+                      help='Return non-zero exit code if there are warnings')
     options, args = parser.parse_args()
 
-    installedlist = []
-    if options.installed:
-        with open(options.installed, 'r') as outfile:
-            for line in outfile:
-                installedlist.append(line.strip())
-
     reporter = Reporter(options.log)
     if options.ignore:
         reporter.load_filters(options.ignore)
@@ -209,10 +386,14 @@ def main():
     if not options.quiet:
         sys.stderr.write('Scanning source tree...\n')
     tree = GromacsTree(options.source_root, options.build_root, reporter)
-    tree.set_installed_file_list(installedlist)
+    tree.load_git_attributes()
+    tree.load_installed_file_list()
     if not options.quiet:
         sys.stderr.write('Reading source files...\n')
-    tree.scan_files()
+    # TODO: The checking should be possible without storing everything in memory
+    tree.scan_files(keep_contents=True)
+    if options.ignore_cycles:
+        tree.load_cycle_suppression_list(options.ignore_cycles)
     if not options.quiet:
         sys.stderr.write('Reading Doxygen XML files...\n')
     tree.load_xml()
@@ -222,20 +403,13 @@ def main():
     if not options.quiet:
         sys.stderr.write('Checking...\n')
 
-    for fileobj in tree.get_files():
-        check_file(fileobj, reporter)
-        for includedfile in fileobj.get_includes():
-            check_include(fileobj, includedfile, reporter)
-
-    for classobj in tree.get_classes():
-        check_class(classobj, reporter)
-
-    for memberobj in tree.get_members():
-        if memberobj.is_visible() or options.check_ignored:
-            check_member(memberobj, reporter)
+    check_all(tree, reporter, options.check_ignored)
 
     reporter.write_pending()
     reporter.report_unused_filters()
     reporter.close_log()
 
+    if options.exitcode and reporter.had_warnings():
+        sys.exit(1)
+
 main()