Embed module dependency graph in Doxygen docs
[alexxy/gromacs.git] / docs / doxygen / gmxtree.py
index c974d13a1454d3462cad74e9d46899b86a811270..6359b6391eca4b115a9df8541518e0b56ac6f98a 100644 (file)
@@ -50,9 +50,11 @@ rules that come from GROMACS-specific knowledge.  In the future, more such
 customizations will be added.
 """
 
+import collections
 import os
 import os.path
 import re
+import subprocess
 
 import doxygenxml as xml
 import reporter
@@ -75,14 +77,15 @@ class IncludedFile(object):
 
     """Information about an #include directive in a file."""
 
-    def __init__(self, abspath, lineno, included_file, included_path, is_relative, is_system):
-        self._abspath = abspath
+    def __init__(self, including_file, lineno, included_file, included_path, is_relative, is_system, line):
+        self._including_file = including_file
         self._line_number = lineno
         self._included_file = included_file
         self._included_path = included_path
         #self._used_include_path = used_include_path
         self._is_relative = is_relative
         self._is_system = is_system
+        self._line = line
 
     def __str__(self):
         if self._is_system:
@@ -96,11 +99,49 @@ class IncludedFile(object):
     def is_relative(self):
         return self._is_relative
 
+    def get_included_path(self):
+        return self._included_path
+
+    def get_including_file(self):
+        return self._including_file
+
     def get_file(self):
         return self._included_file
 
+    def get_line_number(self):
+        return self._line_number
+
+    def get_full_line(self):
+        """Return the full source line on which this include appears.
+
+        Trailing newline is included."""
+        return self._line
+
     def get_reporter_location(self):
-        return reporter.Location(self._abspath, self._line_number)
+        return reporter.Location(self._including_file.get_abspath(), self._line_number)
+
+class IncludeBlock(object):
+
+    """Block of consequent #include directives in a file."""
+
+    def __init__(self, first_included_file):
+        self._first_line = first_included_file.get_line_number()
+        self._last_line = self._first_line
+        self._files = []
+        self.add_file(first_included_file)
+
+    def add_file(self, included_file):
+        self._files.append(included_file)
+        self._last_line = included_file.get_line_number()
+
+    def get_includes(self):
+        return self._files
+
+    def get_first_line(self):
+        return self._first_line
+
+    def get_last_line(self):
+        return self._last_line
 
 class File(object):
 
@@ -118,6 +159,10 @@ class File(object):
         self._apitype = DocType.none
         self._modules = set()
         self._includes = []
+        self._include_blocks = []
+        self._main_header = None
+        self._lines = None
+        self._filter = None
         directory.add_file(self)
 
     def set_doc_xml(self, rawdoc, sourcetree):
@@ -137,7 +182,16 @@ class File(object):
         """Mark the file installed."""
         self._installed = True
 
-    def _process_include(self, lineno, is_system, includedpath, sourcetree):
+    def set_git_filter_attribute(self, filtername):
+        """Set the git filter attribute associated with the file."""
+        self._filter = filtername
+
+    def set_main_header(self, included_file):
+        """Set the main header file for a source file."""
+        assert self.is_source_file()
+        self._main_header = included_file
+
+    def _process_include(self, lineno, is_system, includedpath, line, sourcetree):
         """Process #include directive during scan()."""
         is_relative = False
         if is_system:
@@ -150,21 +204,36 @@ class File(object):
                 fileobj = sourcetree.get_file(fullpath)
             else:
                 fileobj = sourcetree.find_include_file(includedpath)
-        self._includes.append(IncludedFile(self.get_abspath(), lineno, fileobj, includedpath,
-                is_relative, is_system))
+        included_file = IncludedFile(self, lineno, fileobj, includedpath,
+            is_relative, is_system, line)
+        self._includes.append(included_file)
+        return included_file
 
-    def scan_contents(self, sourcetree):
+    def scan_contents(self, sourcetree, keep_contents):
         """Scan the file contents and initialize information based on it."""
         # TODO: Consider a more robust regex.
-        include_re = r'^#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
+        include_re = r'^\s*#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
+        current_block = None
+        # TODO: Consider reading directly into this list, and iterate that.
+        lines = []
         with open(self._abspath, 'r') as scanfile:
             for lineno, line in enumerate(scanfile, 1):
+                lines.append(line)
                 match = re.match(include_re, line)
                 if match:
                     is_system = (match.group('quote') == '<')
                     includedpath = match.group('path')
-                    self._process_include(lineno, is_system, includedpath,
-                            sourcetree)
+                    included_file = self._process_include(lineno, is_system,
+                            includedpath, line, sourcetree)
+                    if current_block is None:
+                        current_block = IncludeBlock(included_file)
+                        self._include_blocks.append(current_block)
+                    else:
+                        current_block.add_file(included_file)
+                elif line and not line.isspace():
+                    current_block = None
+        if keep_contents:
+            self._lines = lines
 
     def get_reporter_location(self):
         return reporter.Location(self._abspath, None)
@@ -181,6 +250,10 @@ class File(object):
     def is_test_file(self):
         return self._dir.is_test_directory()
 
+    def should_includes_be_sorted(self):
+        """Return whether the include directives in the file should be sorted."""
+        return self._filter in ('includesort', 'uncrustify')
+
     def is_documented(self):
         return self._rawdoc and self._rawdoc.is_documented()
 
@@ -196,7 +269,10 @@ class File(object):
     def get_name(self):
         return os.path.basename(self._abspath)
 
-    def get_documentation_type(self):
+    def get_directory(self):
+        return self._dir
+
+    def get_doc_type(self):
         if not self._rawdoc:
             return DocType.none
         return self._rawdoc.get_visibility()
@@ -204,6 +280,22 @@ class File(object):
     def get_api_type(self):
         return self._apitype
 
+    def api_type_is_reliable(self):
+        if self._apitype in (DocType.internal, DocType.library):
+            return True
+        module = self.get_module()
+        return module and module.is_documented()
+
+    def is_public(self):
+        if self.api_type_is_reliable():
+            return self.get_api_type() == DocType.public
+        return self.get_api_type() == DocType.public or self.is_installed()
+
+    def is_module_internal(self):
+        if self.is_source_file():
+            return True
+        return not self.is_installed() and self.get_api_type() <= DocType.internal
+
     def get_expected_module(self):
         return self._dir.get_module()
 
@@ -219,7 +311,33 @@ class File(object):
     def get_includes(self):
         return self._includes
 
+    def get_include_blocks(self):
+        return self._include_blocks
+
+    def get_main_header(self):
+        return self._main_header
+
+    def get_contents(self):
+        return self._lines
+
 class GeneratedFile(File):
+    def __init__(self, abspath, relpath, directory):
+        File.__init__(self, abspath, relpath, directory)
+        self._generator_source_file = None
+
+    def scan_contents(self, sourcetree, keep_contents):
+        if os.path.exists(self.get_abspath()):
+            File.scan_contents(self, sourcetree, keep_contents)
+
+    def set_generator_source(self, sourcefile):
+        self._generator_source_file = sourcefile
+
+    def get_reporter_location(self):
+        if self._generator_source_file:
+            return self._generator_source_file.get_reporter_location()
+        return File.get_reporter_location(self)
+
+class GeneratorSourceFile(File):
     pass
 
 class Directory(object):
@@ -308,6 +426,46 @@ class Directory(object):
         for fileobj in self._files:
             yield fileobj
 
+    def contains(self, fileobj):
+        """Check whether file is within the directory or its subdirectories."""
+        dirobj = fileobj.get_directory()
+        while dirobj:
+            if dirobj == self:
+                return True
+            dirobj = dirobj._parent
+        return False
+
+class ModuleDependency(object):
+
+    """Dependency between modules."""
+
+    def __init__(self, othermodule):
+        """Initialize empty dependency object with given module as dependency."""
+        self._othermodule = othermodule
+        self._includedfiles = []
+        self._cyclesuppression = None
+
+    def add_included_file(self, includedfile):
+        """Add IncludedFile that is part of this dependency."""
+        assert includedfile.get_file().get_module() == self._othermodule
+        self._includedfiles.append(includedfile)
+
+    def set_cycle_suppression(self):
+        """Set suppression on cycles containing this dependency."""
+        self._cyclesuppression = True
+
+    def is_cycle_suppressed(self):
+        """Return whether cycles containing this dependency are suppressed."""
+        return self._cyclesuppression is not None
+
+    def get_other_module(self):
+        """Get module that this dependency is to."""
+        return self._othermodule
+
+    def get_included_files(self):
+        """Get IncludedFile objects for the individual include dependencies."""
+        return self._includedfiles
+
 class Module(object):
 
     """Code module in the GROMACS source tree.
@@ -324,6 +482,7 @@ class Module(object):
         self._rawdoc = None
         self._rootdir = rootdir
         self._group = None
+        self._dependencies = dict()
 
     def set_doc_xml(self, rawdoc, sourcetree):
         """Assiociate Doxygen documentation entity with the module."""
@@ -336,6 +495,13 @@ class Module(object):
                 if groupname.startswith('group_'):
                     self._group = groupname[6:]
 
+    def add_dependency(self, othermodule, includedfile):
+        """Add #include dependency from a file in this module."""
+        assert includedfile.get_file().get_module() == othermodule
+        if othermodule not in self._dependencies:
+            self._dependencies[othermodule] = ModuleDependency(othermodule)
+        self._dependencies[othermodule].add_included_file(includedfile)
+
     def is_documented(self):
         return self._rawdoc is not None
 
@@ -352,6 +518,18 @@ class Module(object):
     def get_group(self):
         return self._group
 
+    def get_dependencies(self):
+        return self._dependencies.itervalues()
+
+class Namespace(object):
+
+    """Namespace in the GROMACS source code."""
+
+    def __init__(self, rawdoc):
+        self._rawdoc = rawdoc
+
+    def is_anonymous(self):
+        return self._rawdoc.is_anonymous()
 
 class Class(object):
 
@@ -376,19 +554,66 @@ class Class(object):
     def has_brief_description(self):
         return self._rawdoc.has_brief_description()
 
-    def get_documentation_type(self):
+    def get_doc_type(self):
+        """Return documentation type (visibility) for the class.
+
+        In addition to the actual code, this encodes GROMACS-specific logic
+        of setting EXTRACT_LOCAL_CLASSES=YES only for the full documentation.
+        Local classes never appear outside the full documentation, no matter
+        what is their visibility.
+        """
         if not self.is_documented():
             return DocType.none
         if self._rawdoc.is_local():
             return DocType.internal
         return self._rawdoc.get_visibility()
 
-    def get_file_documentation_type(self):
-        return max([fileobj.get_documentation_type() for fileobj in self._files])
+    def get_file_doc_type(self):
+        return max([fileobj.get_doc_type() for fileobj in self._files])
 
     def is_in_installed_file(self):
         return any([fileobj.is_installed() for fileobj in self._files])
 
+class Member(object):
+
+    """Member (in Doxygen terminology) in the GROMACS source tree.
+
+    Currently, modeling is limited to the minimal set of properties that the
+    checker uses.
+    """
+
+    def __init__(self, rawdoc, namespace):
+        self._rawdoc = rawdoc
+        self._namespace = namespace
+
+    def get_name(self):
+        return self._rawdoc.get_name()
+
+    def get_reporter_location(self):
+        return self._rawdoc.get_reporter_location()
+
+    def is_documented(self):
+        return self._rawdoc.is_documented()
+
+    def has_brief_description(self):
+        return self._rawdoc.has_brief_description()
+
+    def has_inbody_description(self):
+        return self._rawdoc.has_inbody_description()
+
+    def is_visible(self):
+        """Return whether the member is visible in Doxygen documentation.
+
+        Doxygen ignores members whose parent compounds are not documented.
+        However, when EXTRACT_ANON_NPACES=ON (which is set for our full
+        documentation), members of anonymous namespaces are extracted even if
+        the namespace is the only parent and is not documented.
+        """
+        if self._namespace and self._namespace.is_anonymous():
+            return True
+        return self._rawdoc.get_inherited_visibility() != DocType.none
+
+
 class GromacsTree(object):
 
     """Root object for navigating the GROMACS source tree.
@@ -398,8 +623,11 @@ class GromacsTree(object):
     subdirectories.  At this point, only information that is accessible from
     file names and paths only is available.
 
-    set_installed_file_list() can be called to set the list of installed
-    files.
+    load_git_attributes() can be called to load attribute information from
+    .gitattributes for all the files.
+
+    load_installed_file_list() can be called to load the list of installed
+    files from the build tree (generated by the find-installed-headers target).
 
     scan_files() can be called to read all the files and initialize #include
     dependencies between the files based on the information.  This is done like
@@ -423,7 +651,32 @@ class GromacsTree(object):
         self._files = dict()
         self._modules = dict()
         self._classes = set()
+        self._namespaces = set()
+        self._members = set()
         self._walk_dir(os.path.join(self._source_root, 'src'))
+        for fileobj in self.get_files():
+            if fileobj and fileobj.is_source_file() and not fileobj.is_external():
+                (basedir, name) = os.path.split(fileobj.get_abspath())
+                (basename, ext) = os.path.splitext(name)
+                header = self.get_file(os.path.join(basedir, basename + '.h'))
+                if not header and ext == '.cu':
+                    header = self.get_file(os.path.join(basedir, basename + '.cuh'))
+                if not header and fileobj.is_test_file():
+                    basedir = os.path.dirname(basedir)
+                    header = self.get_file(os.path.join(basedir, basename + '.h'))
+                    if not header:
+                        # Somewhat of a hack; currently, the tests for
+                        # analysisdata/modules/ and trajectoryanalysis/modules/
+                        # is at the top-level tests directory.
+                        # TODO: It could be clearer to split the tests so that
+                        # there would be a separate modules/tests/.
+                        header = self.get_file(os.path.join(basedir, 'modules', basename + '.h'))
+                    if not header and basename.endswith('_tests'):
+                        header = self.get_file(os.path.join(basedir, basename[:-6] + '.h'))
+                if not header and fileobj.get_relpath().startswith('src/gromacs'):
+                    header = self._files.get(os.path.join('src/gromacs/legacyheaders', basename + '.h'))
+                if header:
+                    fileobj.set_main_header(header)
         rootdir = self._get_dir(os.path.join('src', 'gromacs'))
         for subdir in rootdir.get_subdirectories():
             self._create_module(subdir)
@@ -468,10 +721,20 @@ class GromacsTree(object):
                 elif extension == '.cmakein':
                     extension = os.path.splitext(basename)[1]
                     if extension in extensions:
+                        fullpath = os.path.join(dirpath, filename)
+                        relpath = self._get_rel_path(fullpath)
+                        sourcefile = GeneratorSourceFile(fullpath, relpath, currentdir)
+                        self._files[relpath] = sourcefile
                         fullpath = os.path.join(dirpath, basename)
                         relpath = self._get_rel_path(fullpath)
-                        fullpath = os.path.join(dirpath, filename)
-                        self._files[relpath] = GeneratedFile(fullpath, relpath, currentdir)
+                        fullpath = os.path.join(self._build_root, relpath)
+                        generatedfile = GeneratedFile(fullpath, relpath, currentdir)
+                        self._files[relpath] = generatedfile
+                        generatedfile.set_generator_source(sourcefile)
+                elif extension in ('.l', '.y', '.pre'):
+                    fullpath = os.path.join(dirpath, filename)
+                    relpath = self._get_rel_path(fullpath)
+                    self._files[relpath] = GeneratorSourceFile(fullpath, relpath, currentdir)
 
     def _create_module(self, rootdir):
         """Create module for a subdirectory."""
@@ -480,13 +743,25 @@ class GromacsTree(object):
         rootdir.set_module(moduleobj)
         self._modules[name] = moduleobj
 
-    def scan_files(self):
+    def scan_files(self, only_files=None, keep_contents=False):
         """Read source files to initialize #include dependencies."""
-        for fileobj in self._files.itervalues():
+        if only_files:
+            filelist = only_files
+        else:
+            filelist = self._files.itervalues()
+        for fileobj in filelist:
             if not fileobj.is_external():
-                fileobj.scan_contents(self)
-
-    def load_xml(self, only_files=False):
+                fileobj.scan_contents(self, keep_contents)
+                module = fileobj.get_module()
+                if module:
+                    for includedfile in fileobj.get_includes():
+                        otherfile = includedfile.get_file()
+                        if otherfile:
+                            othermodule = otherfile.get_module()
+                            if othermodule and othermodule != module:
+                                module.add_dependency(othermodule, includedfile)
+
+    def load_xml(self, only_files=None):
         """Load Doxygen XML information.
 
         If only_files is True, XML data is not loaded for code constructs, but
@@ -495,7 +770,11 @@ class GromacsTree(object):
         xmldir = os.path.join(self._build_root, 'docs', 'html', 'doxygen', 'xml')
         self._docset = xml.DocumentationSet(xmldir, self._reporter)
         if only_files:
-            self._docset.load_file_details()
+            if isinstance(only_files, collections.Iterable):
+                filelist = [x.get_abspath() for x in only_files]
+                self._docset.load_file_details(filelist)
+            else:
+                self._docset.load_file_details()
         else:
             self._docset.load_details()
             self._docset.merge_duplicates()
@@ -503,7 +782,9 @@ class GromacsTree(object):
         self._load_modules()
         self._load_files()
         if not only_files:
+            self._load_namespaces()
             self._load_classes()
+            self._load_members()
 
     def _load_dirs(self):
         """Load Doxygen XML directory information."""
@@ -546,11 +827,15 @@ class GromacsTree(object):
         """Load Doxygen XML file information."""
         for filedoc in self._docset.get_files():
             path = filedoc.get_path()
+            if not path:
+                # In case of only partially loaded file information,
+                # the path information is not set for unloaded files.
+                continue
             if not os.path.isabs(path):
                 self._reporter.xml_assert(filedoc.get_xml_path(),
                         "expected absolute path in Doxygen-produced XML file")
                 continue
-            extension = os.path.splitext(filedoc.get_path())[1]
+            extension = os.path.splitext(path)[1]
             # We don't care about Markdown files that only produce pages
             # (and fail the directory check below).
             if extension == '.md':
@@ -568,6 +853,14 @@ class GromacsTree(object):
             fileobj.set_doc_xml(filedoc, self)
             self._docmap[filedoc] = fileobj
 
+    def _load_namespaces(self):
+        """Load Doxygen XML namespace information."""
+        nsdocs = self._docset.get_namespaces()
+        for nsdoc in nsdocs:
+            nsobj = Namespace(nsdoc)
+            self._docmap[nsdoc] = nsobj
+            self._namespaces.add(nsobj)
+
     def _load_classes(self):
         """Load Doxygen XML class information."""
         classdocs = self._docset.get_classes()
@@ -577,6 +870,16 @@ class GromacsTree(object):
             self._docmap[classdoc] = classobj
             self._classes.add(classobj)
 
+    def _load_members(self):
+        """Load Doxygen XML member information."""
+        memberdocs = self._docset.get_members()
+        for memberdoc in memberdocs:
+            nsdoc = memberdoc.get_namespace()
+            nsobj = self.get_object(nsdoc)
+            memberobj = Member(memberdoc, nsobj)
+            self._docmap[memberdoc] = memberobj
+            self._members.add(memberobj)
+
     def _get_dir(self, relpath):
         """Get directory object for a path relative to source tree root."""
         return self._dirs.get(relpath)
@@ -587,28 +890,75 @@ class GromacsTree(object):
 
     def find_include_file(self, includedpath):
         """Find a file object corresponding to an include path."""
-        for testdir in ('src', 'src/gromacs/legacyheaders', 'src/external/thread_mpi/include'):
+        for testdir in ('src', 'src/external/thread_mpi/include',
+                'src/external/tng_io/include'):
             testpath = os.path.join(testdir, includedpath)
             if testpath in self._files:
                 return self._files[testpath]
 
-    def set_installed_file_list(self, installedfiles):
-        """Set list of installed files."""
-        for path in installedfiles:
-            if not os.path.isabs(path):
-                self._reporter.input_error(
-                        "installed file not specified with absolute path: {0}"
-                        .format(path))
-                continue
-            relpath = self._get_rel_path(path)
-            if relpath not in self._files:
-                self._reporter.input_error(
-                        "installed file not in source tree: {0}".format(path))
-                continue
-            self._files[relpath].set_installed()
+    def load_git_attributes(self):
+        """Load git attribute information for files."""
+        args = ['git', 'check-attr', '--stdin', 'filter']
+        git_check_attr = subprocess.Popen(args, stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE, cwd=self._source_root)
+        filelist = '\n'.join(map(File.get_relpath, self._files.itervalues()))
+        filters = git_check_attr.communicate(filelist)[0]
+        for fileinfo in filters.splitlines():
+            path, dummy, value = fileinfo.split(': ')
+            fileobj = self._files.get(path)
+            assert fileobj is not None
+            fileobj.set_git_filter_attribute(value)
+
+    def load_installed_file_list(self):
+        """Load list of installed files from the build tree."""
+        listpath = os.path.join(self._build_root, 'docs', 'doxygen', 'installed-headers.txt')
+        with open(listpath, 'r') as installedfp:
+            for line in installedfp:
+                path = line.strip()
+                if not os.path.isabs(path):
+                    self._reporter.input_error(
+                            "installed file not specified with absolute path: {0}"
+                            .format(path))
+                    continue
+                relpath = self._get_rel_path(path)
+                if relpath not in self._files:
+                    self._reporter.input_error(
+                            "installed file not in source tree: {0}".format(path))
+                    continue
+                self._files[relpath].set_installed()
+
+    def load_cycle_suppression_list(self, filename):
+        """Load a list of edges to suppress in cycles.
+
+        These edges between modules, if present, will be marked in the
+        corresponding ModuleDependency objects.
+        """
+        with open(filename, 'r') as fp:
+            for line in fp:
+                line = line.strip()
+                if not line or line.startswith('#'):
+                    continue
+                modulenames = ['module_' + x.strip() for x in line.split('->')]
+                if len(modulenames) != 2:
+                    self._reporter.input_error(
+                            "invalid cycle suppression line: {0}".format(line))
+                    continue
+                firstmodule = self._modules.get(modulenames[0])
+                secondmodule = self._modules.get(modulenames[1])
+                if not firstmodule or not secondmodule:
+                    self._reporter.input_error(
+                            "unknown modules mentioned on cycle suppression line: {0}".format(line))
+                    continue
+                for dep in firstmodule.get_dependencies():
+                    if dep.get_other_module() == secondmodule:
+                        # TODO: Check that each suppression is actually part of
+                        # a cycle.
+                        dep.set_cycle_suppression()
 
     def get_object(self, docobj):
         """Get tree object for a Doxygen XML object."""
+        if docobj is None:
+            return None
         return self._docmap.get(docobj)
 
     def get_files(self):
@@ -625,5 +975,4 @@ class GromacsTree(object):
 
     def get_members(self):
         """Get iterable for all members (in Doxygen terms) in the source tree."""
-        # TODO: Add wrappers to solve some issues.
-        return self._docset.get_members()
+        return self._members