From 3e079803eb7a3a03b64fa0e67f1b29f44b9502db Mon Sep 17 00:00:00 2001
From: Teemu Murtola <teemu.murtola@gmail.com>
Date: Fri, 12 Sep 2014 07:28:48 +0300
Subject: [PATCH] Run include order check in doc-check

Now the doc-check target also checks that all files conform to the
include ordering produced by the include sorter.

Add support into the include sorter for only checking the ordering
without changing anything, and partially improve things such that the
full contents of the file are no longer required for some parts of the
checking.  There seems to be no performance impact for now from storing
all the file contents in memory, so did not go through this, but the
partial changes also improve readability of the code.
Add support to gmxtree for loading the git attributes, to know which
files not to check for include ordering.

Change-Id: I919850dab2dfa742f9fb5b216cc163bc118082cc
---
 docs/doxygen/doxygen-check.py | 41 ++++++++++++++++++++++-------------
 docs/doxygen/gmxtree.py       | 41 +++++++++++++++++++++++++++++++----
 docs/doxygen/includesorter.py | 27 +++++++++++++++++------
 3 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/docs/doxygen/doxygen-check.py b/docs/doxygen/doxygen-check.py
index 773dbfbefd..899b19589b 100755
--- a/docs/doxygen/doxygen-check.py
+++ b/docs/doxygen/doxygen-check.py
@@ -57,6 +57,7 @@ from optparse import OptionParser
 
 import gmxtree
 from gmxtree import GromacsTree, DocType
+from includesorter import IncludeSorter
 from reporter import Reporter
 
 def check_file(fileobj, reporter):
@@ -336,6 +337,27 @@ class ModuleDependencyGraph(object):
         summary = 'module-level cyclic dependency: ' + modulelist
         reporter.cyclic_issue(summary)
 
+def check_all(tree, reporter, check_ignored):
+    """Do all checks for the GROMACS tree."""
+    includesorter = IncludeSorter()
+    for fileobj in tree.get_files():
+        if isinstance(fileobj, gmxtree.GeneratorSourceFile):
+            continue
+        check_file(fileobj, reporter)
+        for includedfile in fileobj.get_includes():
+            check_include(fileobj, includedfile, reporter)
+        if fileobj.should_includes_be_sorted() \
+                and not includesorter.check_sorted(fileobj):
+            reporter.code_issue(fileobj, "include order is not consistent")
+
+    for classobj in tree.get_classes():
+        check_class(classobj, reporter)
+
+    for memberobj in tree.get_members():
+        check_member(memberobj, reporter, check_ignored)
+
+    check_cycles(ModuleDependencyGraph(tree), reporter)
+
 def main():
     """Run the checking script."""
     parser = OptionParser()
@@ -364,10 +386,12 @@ def main():
     if not options.quiet:
         sys.stderr.write('Scanning source tree...\n')
     tree = GromacsTree(options.source_root, options.build_root, reporter)
+    tree.load_git_attributes()
     tree.load_installed_file_list()
     if not options.quiet:
         sys.stderr.write('Reading source files...\n')
-    tree.scan_files()
+    # TODO: The checking should be possible without storing everything in memory
+    tree.scan_files(keep_contents=True)
     if options.ignore_cycles:
         tree.load_cycle_suppression_list(options.ignore_cycles)
     if not options.quiet:
@@ -379,20 +403,7 @@ def main():
     if not options.quiet:
         sys.stderr.write('Checking...\n')
 
-    for fileobj in tree.get_files():
-        if isinstance(fileobj, gmxtree.GeneratorSourceFile):
-            continue
-        check_file(fileobj, reporter)
-        for includedfile in fileobj.get_includes():
-            check_include(fileobj, includedfile, reporter)
-
-    for classobj in tree.get_classes():
-        check_class(classobj, reporter)
-
-    for memberobj in tree.get_members():
-        check_member(memberobj, reporter, options.check_ignored)
-
-    check_cycles(ModuleDependencyGraph(tree), reporter)
+    check_all(tree, reporter, options.check_ignored)
 
     reporter.write_pending()
     reporter.report_unused_filters()
diff --git a/docs/doxygen/gmxtree.py b/docs/doxygen/gmxtree.py
index 1c28e6a3e4..67240f391f 100644
--- a/docs/doxygen/gmxtree.py
+++ b/docs/doxygen/gmxtree.py
@@ -54,6 +54,7 @@ import collections
 import os
 import os.path
 import re
+import subprocess
 
 import doxygenxml as xml
 import reporter
@@ -76,7 +77,7 @@ class IncludedFile(object):
 
     """Information about an #include directive in a file."""
 
-    def __init__(self, including_file, lineno, included_file, included_path, is_relative, is_system):
+    def __init__(self, including_file, lineno, included_file, included_path, is_relative, is_system, line):
         self._including_file = including_file
         self._line_number = lineno
         self._included_file = included_file
@@ -84,6 +85,7 @@ class IncludedFile(object):
         #self._used_include_path = used_include_path
         self._is_relative = is_relative
         self._is_system = is_system
+        self._line = line
 
     def __str__(self):
         if self._is_system:
@@ -109,6 +111,12 @@ class IncludedFile(object):
     def get_line_number(self):
         return self._line_number
 
+    def get_full_line(self):
+        """Return the full source line on which this include appears.
+
+        Trailing newline is included."""
+        return self._line
+
     def get_reporter_location(self):
         return reporter.Location(self._including_file.get_abspath(), self._line_number)
 
@@ -154,6 +162,7 @@ class File(object):
         self._include_blocks = []
         self._main_header = None
         self._lines = None
+        self._filter = None
         directory.add_file(self)
 
     def set_doc_xml(self, rawdoc, sourcetree):
@@ -173,12 +182,16 @@ class File(object):
         """Mark the file installed."""
         self._installed = True
 
+    def set_git_filter_attribute(self, filtername):
+        """Set the git filter attribute associated with the file."""
+        self._filter = filtername
+
     def set_main_header(self, included_file):
         """Set the main header file for a source file."""
         assert self.is_source_file()
         self._main_header = included_file
 
-    def _process_include(self, lineno, is_system, includedpath, sourcetree):
+    def _process_include(self, lineno, is_system, includedpath, line, sourcetree):
         """Process #include directive during scan()."""
         is_relative = False
         if is_system:
@@ -192,7 +205,7 @@ class File(object):
             else:
                 fileobj = sourcetree.find_include_file(includedpath)
         included_file = IncludedFile(self, lineno, fileobj, includedpath,
-            is_relative, is_system)
+            is_relative, is_system, line)
         self._includes.append(included_file)
         return included_file
 
@@ -211,7 +224,7 @@ class File(object):
                     is_system = (match.group('quote') == '<')
                     includedpath = match.group('path')
                     included_file = self._process_include(lineno, is_system,
-                            includedpath, sourcetree)
+                            includedpath, line, sourcetree)
                     if current_block is None:
                         current_block = IncludeBlock(included_file)
                         self._include_blocks.append(current_block)
@@ -237,6 +250,10 @@ class File(object):
     def is_test_file(self):
         return self._dir.is_test_directory()
 
+    def should_includes_be_sorted(self):
+        """Return whether the include directives in the file should be sorted."""
+        return self._filter in ('includesort', 'uncrustify')
+
     def is_documented(self):
         return self._rawdoc and self._rawdoc.is_documented()
 
@@ -602,6 +619,9 @@ class GromacsTree(object):
     subdirectories.  At this point, only information that is accessible from
     file names and paths only is available.
 
+    load_git_attributes() can be called to load attribute information from
+    .gitattributes for all the files.
+
     load_installed_file_list() can be called to load the list of installed
     files from the build tree (generated by the find-installed-headers target).
 
@@ -872,6 +892,19 @@ class GromacsTree(object):
             if testpath in self._files:
                 return self._files[testpath]
 
+    def load_git_attributes(self):
+        """Load git attribute information for files."""
+        args = ['git', 'check-attr', '--stdin', 'filter']
+        git_check_attr = subprocess.Popen(args, stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE, cwd=self._source_root)
+        filelist = '\n'.join(map(File.get_relpath, self._files.itervalues()))
+        filters = git_check_attr.communicate(filelist)[0]
+        for fileinfo in filters.splitlines():
+            path, dummy, value = fileinfo.split(': ')
+            fileobj = self._files.get(path)
+            assert fileobj is not None
+            fileobj.set_git_filter_attribute(value)
+
     def load_installed_file_list(self):
         """Load list of installed files from the build tree."""
         listpath = os.path.join(self._build_root, 'docs', 'doxygen', 'installed-headers.txt')
diff --git a/docs/doxygen/includesorter.py b/docs/doxygen/includesorter.py
index 23c2ecf0c6..635bf53256 100755
--- a/docs/doxygen/includesorter.py
+++ b/docs/doxygen/includesorter.py
@@ -40,8 +40,10 @@ It allows (in most cases) automatically sorting includes and formatting
 the paths to use either relative paths or paths relative to src/.
 It groups includes in groups of related headers, sorts the headers
 alphabetically within each block, and inserts empty lines in between.
-The script requires an up-to-date list of installed headers and Doxygen XML
-documentation to be present in the build tree.
+It can be run as a standalone script, in which case it requires an up-to-date
+list of installed headers and Doxygen XML documentation to be present in the
+build tree.  It can also be imported as a module to be embedded in other
+scripts.  In the latter case, the IncludeSorter provides the main interface.
 
 The sorting assumes some conventions (e.g., that system headers are included
 with angle brackets instead of quotes).  Generally, these conventions are
@@ -108,7 +110,7 @@ class GroupedSorter(object):
             'regex', 'set', 'sstream', 'stdexcept', 'streambuf', 'string', 'strstream',
             'typeinfo', 'vector', 'utility']
 
-    def __init__(self, style, absolute):
+    def __init__(self, style='pub-priv', absolute=False):
         """Initialize a sorted with the given style."""
         if style == 'single-group':
             self._local_group = 'none'
@@ -210,7 +212,7 @@ class GroupedSorter(object):
             path = self._get_path(included_file, group, including_file)
         return (group, os.path.split(path), include)
 
-    def format_include(self, obj, prev, lines):
+    def format_include(self, obj, prev):
         """Format an #include directive after sorting."""
         result = []
         if prev:
@@ -221,7 +223,7 @@ class GroupedSorter(object):
                 # Skip duplicates
                 return result
         include = obj[2]
-        line = lines[include.get_line_number()-1]
+        line = include.get_full_line()
         include_re = r'^(?P<head>\s*#\s*include\s+)["<][^">]*[">](?P<tail>.*)$'
         match = re.match(include_re, line)
         assert match
@@ -241,8 +243,10 @@ class IncludeSorter(object):
     (see GroupedSorter) to keep things separated.
     """
 
-    def __init__(self, sortmethod, quiet):
+    def __init__(self, sortmethod=None, quiet=True):
         """Initialize the include sorter with the given sorter and options."""
+        if not sortmethod:
+            sortmethod = GroupedSorter()
         self._sortmethod = sortmethod
         self._quiet = quiet
         self._changed = False
@@ -259,7 +263,7 @@ class IncludeSorter(object):
         prev = None
         current_line_number = block.get_first_line()-1
         for include in includes:
-            newlines = self._sortmethod.format_include(include, prev, lines)
+            newlines = self._sortmethod.format_include(include, prev)
             result.extend(newlines)
             if not self._changed:
                 for offset, newline in enumerate(newlines):
@@ -292,6 +296,15 @@ class IncludeSorter(object):
             with open(fileobj.get_abspath(), 'w') as fp:
                 fp.write(''.join(newlines))
 
+    def check_sorted(self, fileobj):
+        """Check that includes within a file are sorted."""
+        # TODO: Make the checking work without full contents of the file
+        lines = fileobj.get_contents()
+        self._changed = False
+        for block in fileobj.get_include_blocks():
+            self._sort_include_block(block, lines)
+        return not self._changed
+
 def main():
     """Run the include sorter script."""
     import os
-- 
2.22.0