Ignore test-only dependency cycles
[alexxy/gromacs.git] / docs / doxygen / gmxtree.py
1 #!/usr/bin/python
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2014,2015, by the GROMACS development team, led by
6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 # and including many others, as listed in the AUTHORS file in the
8 # top-level source directory and at http://www.gromacs.org.
9 #
10 # GROMACS is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public License
12 # as published by the Free Software Foundation; either version 2.1
13 # of the License, or (at your option) any later version.
14 #
15 # GROMACS is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 # Lesser General Public License for more details.
19 #
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with GROMACS; if not, see
22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24 #
25 # If you want to redistribute modifications to GROMACS, please
26 # consider that scientific software is very special. Version
27 # control is crucial - bugs must be traceable. We will be happy to
28 # consider code for inclusion in the official distribution, but
29 # derived work must not be called official GROMACS. Details are found
30 # in the README & COPYING files - if they are missing, get the
31 # official version at http://www.gromacs.org.
32 #
33 # To help us fund GROMACS development, we humbly ask that you cite
34 # the research papers on the package. Check out http://www.gromacs.org.
35
36 """GROMACS-specific representation for source tree and documentation.
37
38 This module provides classes that construct a GROMACS-specific representation
39 of the source tree and associate the Doxygen XML output with it.  It constructs
40 an initial representation by walking the source tree in the file system, and
41 then associates information from the Doxygen XML output into this.
42 It also adds some additional knowledge from how the GROMACS source tree is
43 organized to construct a representation that is easy to process and check as
44 the top-level scripts expect.
45
46 The object model is rooted at a GromacsTree object.  Currently, it constructs a
47 representation of the source tree from the file system, but is otherwise mostly
48 a thin wrapper around the Doxygen XML tree.  It already adds some relations and
49 rules that come from GROMACS-specific knowledge.  In the future, more such
50 customizations will be added.
51 """
52
53 import collections
54 import os
55 import os.path
56 import re
57 import subprocess
58
59 import doxygenxml as xml
60 import reporter
61 # We import DocType directly so that it is exposed from this module as well.
62 from doxygenxml import DocType
63
64 def _get_api_type_for_compound(grouplist):
65     """Helper function to deduce API type from Doxygen group membership."""
66     result = DocType.internal
67     for group in grouplist:
68         if isinstance(group, xml.Group):
69             if group.get_name() == 'group_publicapi':
70                 result = DocType.public
71             elif group.get_name() == 'group_libraryapi':
72                 result = DocType.library
73             # TODO: Check for multiple group membership
74     return result
75
76 class IncludedFile(object):
77
78     """Information about an #include directive in a file."""
79
80     def __init__(self, including_file, lineno, included_file, included_path, is_relative, is_system, line):
81         self._including_file = including_file
82         self._line_number = lineno
83         self._included_file = included_file
84         self._included_path = included_path
85         #self._used_include_path = used_include_path
86         self._is_relative = is_relative
87         self._is_system = is_system
88         self._line = line
89
90     def __str__(self):
91         if self._is_system:
92             return '<{0}>'.format(self._included_path)
93         else:
94             return '"{0}"'.format(self._included_path)
95
96     def is_system(self):
97         return self._is_system
98
99     def is_relative(self):
100         return self._is_relative
101
102     def get_included_path(self):
103         return self._included_path
104
105     def get_including_file(self):
106         return self._including_file
107
108     def get_file(self):
109         return self._included_file
110
111     def get_line_number(self):
112         return self._line_number
113
114     def get_full_line(self):
115         """Return the full source line on which this include appears.
116
117         Trailing newline is included."""
118         return self._line
119
120     def get_reporter_location(self):
121         return reporter.Location(self._including_file.get_abspath(), self._line_number)
122
123 class IncludeBlock(object):
124
125     """Block of consequent #include directives in a file."""
126
127     def __init__(self, first_included_file):
128         self._first_line = first_included_file.get_line_number()
129         self._last_line = self._first_line
130         self._files = []
131         self.add_file(first_included_file)
132
133     def add_file(self, included_file):
134         self._files.append(included_file)
135         self._last_line = included_file.get_line_number()
136
137     def get_includes(self):
138         return self._files
139
140     def get_first_line(self):
141         return self._first_line
142
143     def get_last_line(self):
144         return self._last_line
145
146 class File(object):
147
148     """Source/header file in the GROMACS tree."""
149
150     def __init__(self, abspath, relpath, directory):
151         """Initialize a file representation with basic information."""
152         self._abspath = abspath
153         self._relpath = relpath
154         self._dir = directory
155         self._rawdoc = None
156         self._installed = False
157         extension = os.path.splitext(abspath)[1]
158         self._sourcefile = (extension in ('.c', '.cc', '.cpp', '.cu'))
159         self._apitype = DocType.none
160         self._modules = set()
161         self._includes = []
162         self._include_blocks = []
163         self._main_header = None
164         self._lines = None
165         self._filter = None
166         self._used_config_h_defines = set()
167         directory.add_file(self)
168
169     def set_doc_xml(self, rawdoc, sourcetree):
170         """Assiociate Doxygen documentation entity with the file."""
171         assert self._rawdoc is None
172         assert rawdoc.is_source_file() == self._sourcefile
173         self._rawdoc = rawdoc
174         if self._rawdoc.is_documented():
175             grouplist = self._rawdoc.get_groups()
176             self._apitype = _get_api_type_for_compound(grouplist)
177             for group in grouplist:
178                 module = sourcetree.get_object(group)
179                 if module:
180                     self._modules.add(module)
181
182     def set_installed(self):
183         """Mark the file installed."""
184         self._installed = True
185
186     def set_git_filter_attribute(self, filtername):
187         """Set the git filter attribute associated with the file."""
188         self._filter = filtername
189
190     def set_main_header(self, included_file):
191         """Set the main header file for a source file."""
192         assert self.is_source_file()
193         self._main_header = included_file
194
195     def _process_include(self, lineno, is_system, includedpath, line, sourcetree):
196         """Process #include directive during scan()."""
197         is_relative = False
198         if is_system:
199             fileobj = sourcetree.find_include_file(includedpath)
200         else:
201             fullpath = os.path.join(self._dir.get_abspath(), includedpath)
202             fullpath = os.path.abspath(fullpath)
203             if os.path.exists(fullpath):
204                 is_relative = True
205                 fileobj = sourcetree.get_file(fullpath)
206             else:
207                 fileobj = sourcetree.find_include_file(includedpath)
208         included_file = IncludedFile(self, lineno, fileobj, includedpath,
209             is_relative, is_system, line)
210         self._includes.append(included_file)
211         return included_file
212
213     def scan_contents(self, sourcetree, keep_contents):
214         """Scan the file contents and initialize information based on it."""
215         # TODO: Consider a more robust regex.
216         include_re = r'^\s*#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
217         current_block = None
218         with open(self._abspath, 'r') as scanfile:
219             contents = scanfile.read()
220         lines = contents.splitlines(True)
221         for lineno, line in enumerate(lines, 1):
222             match = re.match(include_re, line)
223             if match:
224                 is_system = (match.group('quote') == '<')
225                 includedpath = match.group('path')
226                 included_file = self._process_include(lineno, is_system,
227                         includedpath, line, sourcetree)
228                 if current_block is None:
229                     current_block = IncludeBlock(included_file)
230                     self._include_blocks.append(current_block)
231                 else:
232                     current_block.add_file(included_file)
233             elif line and not line.isspace():
234                 current_block = None
235         if keep_contents:
236             self._lines = lines
237
238     def add_used_config_h_defines(self, defines):
239         """Set config.h defines used in this file.
240
241         Used internally by find_config_h_uses()."""
242         self._used_config_h_defines.update(defines)
243
244     def get_reporter_location(self):
245         return reporter.Location(self._abspath, None)
246
247     def is_installed(self):
248         return self._installed
249
250     def is_external(self):
251         return self._dir.is_external()
252
253     def is_source_file(self):
254         return self._sourcefile
255
256     def is_test_file(self):
257         return self._dir.is_test_directory()
258
259     def should_includes_be_sorted(self):
260         """Return whether the include directives in the file should be sorted."""
261         return self._filter in ('includesort', 'uncrustify')
262
263     def is_documented(self):
264         return self._rawdoc and self._rawdoc.is_documented()
265
266     def has_brief_description(self):
267         return self._rawdoc and self._rawdoc.has_brief_description()
268
269     def get_abspath(self):
270         return self._abspath
271
272     def get_relpath(self):
273         return self._relpath
274
275     def get_name(self):
276         return os.path.basename(self._abspath)
277
278     def get_directory(self):
279         return self._dir
280
281     def get_doc_type(self):
282         if not self._rawdoc:
283             return DocType.none
284         return self._rawdoc.get_visibility()
285
286     def get_api_type(self):
287         return self._apitype
288
289     def api_type_is_reliable(self):
290         if self._apitype in (DocType.internal, DocType.library):
291             return True
292         module = self.get_module()
293         return module and module.is_documented()
294
295     def is_public(self):
296         if self.api_type_is_reliable():
297             return self.get_api_type() == DocType.public
298         return self.get_api_type() == DocType.public or self.is_installed()
299
300     def is_module_internal(self):
301         if self.is_source_file():
302             return True
303         return not self.is_installed() and self.get_api_type() <= DocType.internal
304
305     def get_expected_module(self):
306         return self._dir.get_module()
307
308     def get_doc_modules(self):
309         return self._modules
310
311     def get_module(self):
312         module = self.get_expected_module()
313         if not module and len(self._modules) == 1:
314             module = list(self._modules)[0]
315         return module
316
317     def get_includes(self):
318         return self._includes
319
320     def get_include_blocks(self):
321         return self._include_blocks
322
323     def get_main_header(self):
324         return self._main_header
325
326     def get_contents(self):
327         return self._lines
328
329     def get_used_config_h_defines(self):
330         """Return set of defines from config.h that are used in this file.
331
332         The return value is empty if find_config_h_uses() has not been called,
333         as well as for headers that declare these defines."""
334         return self._used_config_h_defines
335
336 class GeneratedFile(File):
337     def __init__(self, abspath, relpath, directory):
338         File.__init__(self, abspath, relpath, directory)
339         self._generator_source_file = None
340
341     def scan_contents(self, sourcetree, keep_contents):
342         if os.path.exists(self.get_abspath()):
343             File.scan_contents(self, sourcetree, keep_contents)
344
345     def set_generator_source(self, sourcefile):
346         self._generator_source_file = sourcefile
347
348     def get_reporter_location(self):
349         if self._generator_source_file:
350             return self._generator_source_file.get_reporter_location()
351         return File.get_reporter_location(self)
352
353 class GeneratorSourceFile(File):
354     def __init__(self, abspath, relpath, directory):
355         File.__init__(self, abspath, relpath, directory)
356         self._defines = None
357
358     def scan_contents(self, sourcetree, keep_contents):
359         detect_defines = (self.get_name() == 'config.h.cmakein')
360         File.scan_contents(self, sourcetree, keep_contents or detect_defines)
361         if detect_defines:
362             self._defines = []
363             define_re = r'^#.*define\s+(\w*)'
364             for line in self.get_contents():
365                 match = re.match(define_re, line)
366                 if match:
367                     self._defines.append(match.group(1))
368
369     def get_defines(self):
370         """Return set of possible defines from config.h.cmakein.
371
372         The information is only populated for config.h.cmakein."""
373         return self._defines
374
375 class Directory(object):
376
377     """(Sub)directory in the GROMACS tree."""
378
379     def __init__(self, abspath, relpath, parent):
380         """Initialize a file representation with basic information."""
381         self._abspath = abspath
382         self._relpath = relpath
383         self._name = os.path.basename(abspath)
384         self._parent = parent
385         self._rawdoc = None
386         self._module = None
387         self._is_test_dir = False
388         if parent and parent.is_test_directory() or \
389                 self._name in ('tests', 'legacytests'):
390             self._is_test_dir = True
391         self._is_external = False
392         if parent and parent.is_external() or self._name == 'external':
393             self._is_external = True
394         self._subdirs = set()
395         if parent:
396             parent._subdirs.add(self)
397         self._files = set()
398         self._has_installed_files = None
399
400     def set_doc_xml(self, rawdoc, sourcetree):
401         """Assiociate Doxygen documentation entity with the directory."""
402         assert self._rawdoc is None
403         assert self._abspath == rawdoc.get_path().rstrip('/')
404         self._rawdoc = rawdoc
405
406     def set_module(self, module):
407         assert self._module is None
408         self._module = module
409
410     def add_file(self, fileobj):
411         self._files.add(fileobj)
412
413     def get_name(self):
414         return self._name
415
416     def get_reporter_location(self):
417         return reporter.Location(self._abspath, None)
418
419     def get_abspath(self):
420         return self._abspath
421
422     def get_relpath(self):
423         return self._relpath
424
425     def is_test_directory(self):
426         return self._is_test_dir
427
428     def is_external(self):
429         return self._is_external
430
431     def has_installed_files(self):
432         if self._has_installed_files is None:
433             self._has_installed_files = False
434             for subdir in self._subdirs:
435                 if subdir.has_installed_files():
436                     self._has_installed_files = True
437                     return True
438             for fileobj in self._files:
439                 if fileobj.is_installed():
440                     self._has_installed_files = True
441                     return True
442         return self._has_installed_files
443
444     def get_module(self):
445         if self._module:
446             return self._module
447         if self._parent:
448             return self._parent.get_module()
449         return None
450
451     def get_subdirectories(self):
452         return self._subdirs
453
454     def get_files(self):
455         for subdir in self._subdirs:
456             for fileobj in subdir.get_files():
457                 yield fileobj
458         for fileobj in self._files:
459             yield fileobj
460
461     def contains(self, fileobj):
462         """Check whether file is within the directory or its subdirectories."""
463         dirobj = fileobj.get_directory()
464         while dirobj:
465             if dirobj == self:
466                 return True
467             dirobj = dirobj._parent
468         return False
469
470 class ModuleDependency(object):
471
472     """Dependency between modules."""
473
474     def __init__(self, othermodule):
475         """Initialize empty dependency object with given module as dependency."""
476         self._othermodule = othermodule
477         self._includedfiles = []
478         self._cyclesuppression = None
479         self._is_test_only_dependency = True
480
481     def add_included_file(self, includedfile):
482         """Add IncludedFile that is part of this dependency."""
483         assert includedfile.get_file().get_module() == self._othermodule
484         if not includedfile.get_including_file().is_test_file():
485             self._is_test_only_dependency = False
486         self._includedfiles.append(includedfile)
487
488     def set_cycle_suppression(self):
489         """Set suppression on cycles containing this dependency."""
490         self._cyclesuppression = True
491
492     def is_cycle_suppressed(self):
493         """Return whether cycles containing this dependency are suppressed."""
494         return self._cyclesuppression is not None
495
496     def is_test_only_dependency(self):
497         """Return whether this dependency is only from test code."""
498         return self._is_test_only_dependency
499
500     def get_other_module(self):
501         """Get module that this dependency is to."""
502         return self._othermodule
503
504     def get_included_files(self):
505         """Get IncludedFile objects for the individual include dependencies."""
506         return self._includedfiles
507
508 class Module(object):
509
510     """Code module in the GROMACS source tree.
511
512     Modules are specific subdirectories that host a more or less coherent
513     set of routines.  Simplified, every subdirectory under src/gromacs/ is
514     a different module.  This object provides that abstraction and also links
515     the subdirectory to the module documentation (documented as a group in
516     Doxygen) if that exists.
517     """
518
519     def __init__(self, name, rootdir):
520         self._name = name
521         self._rawdoc = None
522         self._rootdir = rootdir
523         self._group = None
524         self._dependencies = dict()
525
526     def set_doc_xml(self, rawdoc, sourcetree):
527         """Assiociate Doxygen documentation entity with the module."""
528         assert self._rawdoc is None
529         self._rawdoc = rawdoc
530         if self._rawdoc.is_documented():
531             groups = list(self._rawdoc.get_groups())
532             if len(groups) == 1:
533                 groupname = groups[0].get_name()
534                 if groupname.startswith('group_'):
535                     self._group = groupname[6:]
536
537     def add_dependency(self, othermodule, includedfile):
538         """Add #include dependency from a file in this module."""
539         assert includedfile.get_file().get_module() == othermodule
540         if othermodule not in self._dependencies:
541             self._dependencies[othermodule] = ModuleDependency(othermodule)
542         self._dependencies[othermodule].add_included_file(includedfile)
543
544     def is_documented(self):
545         return self._rawdoc is not None
546
547     def get_name(self):
548         return self._name
549
550     def get_root_dir(self):
551         return self._rootdir
552
553     def get_files(self):
554         # TODO: Include public API convenience headers?
555         return self._rootdir.get_files()
556
557     def get_group(self):
558         return self._group
559
560     def get_dependencies(self):
561         return self._dependencies.itervalues()
562
563 class Namespace(object):
564
565     """Namespace in the GROMACS source code."""
566
567     def __init__(self, rawdoc):
568         self._rawdoc = rawdoc
569
570     def is_anonymous(self):
571         return self._rawdoc.is_anonymous()
572
573 class Class(object):
574
575     """Class/struct/union in the GROMACS source code."""
576
577     def __init__(self, rawdoc, files):
578         self._rawdoc = rawdoc
579         self._files = set(files)
580
581     def get_name(self):
582         return self._rawdoc.get_name()
583
584     def get_reporter_location(self):
585         return self._rawdoc.get_reporter_location()
586
587     def get_files(self):
588         return self._files
589
590     def is_documented(self):
591         return self._rawdoc.is_documented()
592
593     def has_brief_description(self):
594         return self._rawdoc.has_brief_description()
595
596     def get_doc_type(self):
597         """Return documentation type (visibility) for the class.
598
599         In addition to the actual code, this encodes GROMACS-specific logic
600         of setting EXTRACT_LOCAL_CLASSES=YES only for the full documentation.
601         Local classes never appear outside the full documentation, no matter
602         what is their visibility.
603         """
604         if not self.is_documented():
605             return DocType.none
606         if self._rawdoc.is_local():
607             return DocType.internal
608         return self._rawdoc.get_visibility()
609
610     def get_file_doc_type(self):
611         return max([fileobj.get_doc_type() for fileobj in self._files])
612
613     def is_in_installed_file(self):
614         return any([fileobj.is_installed() for fileobj in self._files])
615
616 class Member(object):
617
618     """Member (in Doxygen terminology) in the GROMACS source tree.
619
620     Currently, modeling is limited to the minimal set of properties that the
621     checker uses.
622     """
623
624     def __init__(self, rawdoc, namespace):
625         self._rawdoc = rawdoc
626         self._namespace = namespace
627
628     def get_name(self):
629         return self._rawdoc.get_name()
630
631     def get_reporter_location(self):
632         return self._rawdoc.get_reporter_location()
633
634     def is_documented(self):
635         return self._rawdoc.is_documented()
636
637     def has_brief_description(self):
638         return self._rawdoc.has_brief_description()
639
640     def has_inbody_description(self):
641         return self._rawdoc.has_inbody_description()
642
643     def is_visible(self):
644         """Return whether the member is visible in Doxygen documentation.
645
646         Doxygen ignores members whose parent compounds are not documented.
647         However, when EXTRACT_ANON_NPACES=ON (which is set for our full
648         documentation), members of anonymous namespaces are extracted even if
649         the namespace is the only parent and is not documented.
650         """
651         if self._namespace and self._namespace.is_anonymous():
652             return True
653         return self._rawdoc.get_inherited_visibility() != DocType.none
654
655
656 class GromacsTree(object):
657
658     """Root object for navigating the GROMACS source tree.
659
660     On initialization, the list of files and directories is initialized by
661     walking the source tree, and modules are created for top-level
662     subdirectories.  At this point, only information that is accessible from
663     file names and paths only is available.
664
665     load_git_attributes() can be called to load attribute information from
666     .gitattributes for all the files.
667
668     load_installed_file_list() can be called to load the list of installed
669     files from the build tree (generated by CMake).
670
671     scan_files() can be called to read all the files and initialize #include
672     dependencies between the files based on the information.  This is done like
673     this instead of relying on Doxygen-extracted include files to make the
674     dependency graph independent from preprocessor macro definitions
675     (Doxygen only sees those #includes that the preprocessor sees, which
676     depends on what #defines it has seen).
677
678     find_config_h_uses() can be called to find all uses of defines declared in
679     config.h.  In the current implementation, scan_files() must have been
680     called earlier.
681
682     load_xml() can be called to load information from Doxygen XML data in
683     the build tree (the Doxygen XML data must have been built separately).
684     """
685
686     def __init__(self, source_root, build_root, reporter):
687         """Initialize the tree object by walking the source tree."""
688         self._source_root = os.path.abspath(source_root)
689         self._build_root = os.path.abspath(build_root)
690         self._reporter = reporter
691         self._docset = None
692         self._docmap = dict()
693         self._dirs = dict()
694         self._files = dict()
695         self._modules = dict()
696         self._classes = set()
697         self._namespaces = set()
698         self._members = set()
699         self._walk_dir(os.path.join(self._source_root, 'src'))
700         for fileobj in self.get_files():
701             if fileobj and fileobj.is_source_file() and not fileobj.is_external():
702                 (basedir, name) = os.path.split(fileobj.get_abspath())
703                 (basename, ext) = os.path.splitext(name)
704                 header = self.get_file(os.path.join(basedir, basename + '.h'))
705                 if not header and ext == '.cu':
706                     header = self.get_file(os.path.join(basedir, basename + '.cuh'))
707                 if not header and fileobj.is_test_file():
708                     basedir = os.path.dirname(basedir)
709                     header = self.get_file(os.path.join(basedir, basename + '.h'))
710                     if not header:
711                         # Somewhat of a hack; currently, the tests for
712                         # analysisdata/modules/ and trajectoryanalysis/modules/
713                         # is at the top-level tests directory.
714                         # TODO: It could be clearer to split the tests so that
715                         # there would be a separate modules/tests/.
716                         header = self.get_file(os.path.join(basedir, 'modules', basename + '.h'))
717                     if not header and basename.endswith('_tests'):
718                         header = self.get_file(os.path.join(basedir, basename[:-6] + '.h'))
719                 if not header and fileobj.get_relpath().startswith('src/gromacs'):
720                     header = self._files.get(os.path.join('src/gromacs/legacyheaders', basename + '.h'))
721                 if header:
722                     fileobj.set_main_header(header)
723         rootdir = self._get_dir(os.path.join('src', 'gromacs'))
724         for subdir in rootdir.get_subdirectories():
725             self._create_module(subdir)
726         rootdir = self._get_dir(os.path.join('src', 'testutils'))
727         self._create_module(rootdir)
728
729     def _get_rel_path(self, path):
730         assert os.path.isabs(path)
731         if path.startswith(self._build_root):
732             return os.path.relpath(path, self._build_root)
733         if path.startswith(self._source_root):
734             return os.path.relpath(path, self._source_root)
735         raise ValueError("path not under build nor source tree: {0}".format(path))
736
737     def _walk_dir(self, rootpath):
738         """Construct representation of the source tree by walking the file system."""
739         assert os.path.isabs(rootpath)
740         assert rootpath not in self._dirs
741         relpath = self._get_rel_path(rootpath)
742         self._dirs[relpath] = Directory(rootpath, relpath, None)
743         for dirpath, dirnames, filenames in os.walk(rootpath):
744             if 'contrib' in dirnames:
745                 dirnames.remove('contrib')
746             if 'refdata' in dirnames:
747                 dirnames.remove('refdata')
748             currentdir = self._dirs[self._get_rel_path(dirpath)]
749             # Loop through a copy so that we can modify dirnames.
750             for dirname in list(dirnames):
751                 fullpath = os.path.join(dirpath, dirname)
752                 if fullpath == self._build_root:
753                     dirnames.remove(dirname)
754                     continue
755                 relpath = self._get_rel_path(fullpath)
756                 self._dirs[relpath] = Directory(fullpath, relpath, currentdir)
757             extensions = ('.h', '.cuh', '.hpp', '.c', '.cc', '.cpp', '.cu', '.bm')
758             for filename in filenames:
759                 basename, extension = os.path.splitext(filename)
760                 if extension in extensions:
761                     fullpath = os.path.join(dirpath, filename)
762                     relpath = self._get_rel_path(fullpath)
763                     self._files[relpath] = File(fullpath, relpath, currentdir)
764                 elif extension == '.cmakein':
765                     extension = os.path.splitext(basename)[1]
766                     if extension in extensions:
767                         fullpath = os.path.join(dirpath, filename)
768                         relpath = self._get_rel_path(fullpath)
769                         sourcefile = GeneratorSourceFile(fullpath, relpath, currentdir)
770                         self._files[relpath] = sourcefile
771                         fullpath = os.path.join(dirpath, basename)
772                         relpath = self._get_rel_path(fullpath)
773                         fullpath = os.path.join(self._build_root, relpath)
774                         generatedfile = GeneratedFile(fullpath, relpath, currentdir)
775                         self._files[relpath] = generatedfile
776                         generatedfile.set_generator_source(sourcefile)
777                 elif extension in ('.l', '.y', '.pre'):
778                     fullpath = os.path.join(dirpath, filename)
779                     relpath = self._get_rel_path(fullpath)
780                     self._files[relpath] = GeneratorSourceFile(fullpath, relpath, currentdir)
781
782     def _create_module(self, rootdir):
783         """Create module for a subdirectory."""
784         name = 'module_' + rootdir.get_name()
785         moduleobj = Module(name, rootdir)
786         rootdir.set_module(moduleobj)
787         self._modules[name] = moduleobj
788
789     def scan_files(self, only_files=None, keep_contents=False):
790         """Read source files to initialize #include dependencies."""
791         if only_files:
792             filelist = only_files
793         else:
794             filelist = self._files.itervalues()
795         for fileobj in filelist:
796             if not fileobj.is_external():
797                 fileobj.scan_contents(self, keep_contents)
798                 module = fileobj.get_module()
799                 if module:
800                     for includedfile in fileobj.get_includes():
801                         otherfile = includedfile.get_file()
802                         if otherfile:
803                             othermodule = otherfile.get_module()
804                             if othermodule and othermodule != module:
805                                 module.add_dependency(othermodule, includedfile)
806
807     def load_xml(self, only_files=None):
808         """Load Doxygen XML information.
809
810         If only_files is True, XML data is not loaded for code constructs, but
811         only for files, directories, and their potential parents.
812         """
813         xmldir = os.path.join(self._build_root, 'docs', 'html', 'doxygen', 'xml')
814         self._docset = xml.DocumentationSet(xmldir, self._reporter)
815         if only_files:
816             if isinstance(only_files, collections.Iterable):
817                 filelist = [x.get_abspath() for x in only_files]
818                 self._docset.load_file_details(filelist)
819             else:
820                 self._docset.load_file_details()
821         else:
822             self._docset.load_details()
823             self._docset.merge_duplicates()
824         self._load_dirs()
825         self._load_modules()
826         self._load_files()
827         if not only_files:
828             self._load_namespaces()
829             self._load_classes()
830             self._load_members()
831
832     def _load_dirs(self):
833         """Load Doxygen XML directory information."""
834         rootdirs = self._docset.get_compounds(xml.Directory,
835                 lambda x: x.get_parent() is None)
836         for dirdoc in rootdirs:
837             self._load_dir(dirdoc, None)
838
839     def _load_dir(self, dirdoc, parent):
840         """Load Doxygen XML directory information for a single directory."""
841         path = dirdoc.get_path().rstrip('/')
842         if not os.path.isabs(path):
843             self._reporter.xml_assert(dirdoc.get_xml_path(),
844                     "expected absolute path in Doxygen-produced XML file")
845             return
846         relpath = self._get_rel_path(path)
847         dirobj = self._dirs.get(relpath)
848         if not dirobj:
849             dirobj = Directory(path, relpath, parent)
850             self._dirs[relpath] = dirobj
851         dirobj.set_doc_xml(dirdoc, self)
852         self._docmap[dirdoc] = dirobj
853         for subdirdoc in dirdoc.get_subdirectories():
854             self._load_dir(subdirdoc, dirobj)
855
856     def _load_modules(self):
857         """Load Doxygen XML module (group) information."""
858         moduledocs = self._docset.get_compounds(xml.Group,
859                 lambda x: x.get_name().startswith('module_'))
860         for moduledoc in moduledocs:
861             moduleobj = self._modules.get(moduledoc.get_name())
862             if not moduleobj:
863                 self._reporter.input_error(
864                         "no matching directory for module: {0}".format(moduledoc))
865                 continue
866             moduleobj.set_doc_xml(moduledoc, self)
867             self._docmap[moduledoc] = moduleobj
868
869     def _load_files(self):
870         """Load Doxygen XML file information."""
871         for filedoc in self._docset.get_files():
872             path = filedoc.get_path()
873             if not path:
874                 # In case of only partially loaded file information,
875                 # the path information is not set for unloaded files.
876                 continue
877             if not os.path.isabs(path):
878                 self._reporter.xml_assert(filedoc.get_xml_path(),
879                         "expected absolute path in Doxygen-produced XML file")
880                 continue
881             extension = os.path.splitext(path)[1]
882             # We don't care about Markdown files that only produce pages
883             # (and fail the directory check below).
884             if extension == '.md':
885                 continue
886             dirdoc = filedoc.get_directory()
887             if not dirdoc:
888                 self._reporter.xml_assert(filedoc.get_xml_path(),
889                         "file is not in any directory in Doxygen")
890                 continue
891             relpath = self._get_rel_path(path)
892             fileobj = self._files.get(relpath)
893             if not fileobj:
894                 fileobj = File(path, relpath, self._docmap[dirdoc])
895                 self._files[relpath] = fileobj
896             fileobj.set_doc_xml(filedoc, self)
897             self._docmap[filedoc] = fileobj
898
899     def _load_namespaces(self):
900         """Load Doxygen XML namespace information."""
901         nsdocs = self._docset.get_namespaces()
902         for nsdoc in nsdocs:
903             nsobj = Namespace(nsdoc)
904             self._docmap[nsdoc] = nsobj
905             self._namespaces.add(nsobj)
906
907     def _load_classes(self):
908         """Load Doxygen XML class information."""
909         classdocs = self._docset.get_classes()
910         for classdoc in classdocs:
911             files = [self._docmap[filedoc] for filedoc in classdoc.get_files()]
912             classobj = Class(classdoc, files)
913             self._docmap[classdoc] = classobj
914             self._classes.add(classobj)
915
916     def _load_members(self):
917         """Load Doxygen XML member information."""
918         memberdocs = self._docset.get_members()
919         for memberdoc in memberdocs:
920             nsdoc = memberdoc.get_namespace()
921             nsobj = self.get_object(nsdoc)
922             memberobj = Member(memberdoc, nsobj)
923             self._docmap[memberdoc] = memberobj
924             self._members.add(memberobj)
925
926     def _get_dir(self, relpath):
927         """Get directory object for a path relative to source tree root."""
928         return self._dirs.get(relpath)
929
930     def get_file(self, path):
931         """Get file object for a path relative to source tree root."""
932         return self._files.get(self._get_rel_path(path))
933
934     def find_include_file(self, includedpath):
935         """Find a file object corresponding to an include path."""
936         for testdir in ('src', 'src/external/thread_mpi/include',
937                 'src/external/tng_io/include'):
938             testpath = os.path.join(testdir, includedpath)
939             if testpath in self._files:
940                 return self._files[testpath]
941
942     def load_git_attributes(self):
943         """Load git attribute information for files."""
944         args = ['git', 'check-attr', '--stdin', 'filter']
945         git_check_attr = subprocess.Popen(args, stdin=subprocess.PIPE,
946                 stdout=subprocess.PIPE, cwd=self._source_root)
947         filelist = '\n'.join(map(File.get_relpath, self._files.itervalues()))
948         filters = git_check_attr.communicate(filelist)[0]
949         for fileinfo in filters.splitlines():
950             path, dummy, value = fileinfo.split(': ')
951             fileobj = self._files.get(path)
952             assert fileobj is not None
953             fileobj.set_git_filter_attribute(value)
954
955     def find_config_h_uses(self):
956         """Find files that use defines from config.h."""
957         # Executing git grep is substantially faster than using the define_re
958         # directly on the contents of the file in Python.
959         args = ['git', 'grep', '-zwIF']
960         configfile = self._files['src/config.h.cmakein']
961         for define in configfile.get_defines():
962             args.extend(['-e', define])
963         args.extend(['--', '*.cpp', '*.c', '*.cu', '*.h', '*.cuh'])
964         define_re = r'\b(?:' + '|'.join(configfile.get_defines())+ r')\b'
965         output = subprocess.check_output(args, cwd=self._source_root)
966         for line in output.splitlines():
967             (filename, text) = line.split('\0')
968             fileobj = self._files.get(filename)
969             if fileobj is not None:
970                 if fileobj.get_name() not in ('config.h', 'config.h.cmakein',
971                         'gmxpre-config.h', 'gmxpre-config.h.cmakein'):
972                     defines = re.findall(define_re, text)
973                     fileobj.add_used_config_h_defines(defines)
974
975     def load_installed_file_list(self):
976         """Load list of installed files from the build tree."""
977         listpath = os.path.join(self._build_root, 'src', 'gromacs', 'installed-headers.txt')
978         with open(listpath, 'r') as installedfp:
979             for line in installedfp:
980                 path = line.strip()
981                 if not os.path.isabs(path):
982                     self._reporter.input_error(
983                             "installed file not specified with absolute path: {0}"
984                             .format(path))
985                     continue
986                 relpath = self._get_rel_path(path)
987                 if relpath not in self._files:
988                     self._reporter.input_error(
989                             "installed file not in source tree: {0}".format(path))
990                     continue
991                 self._files[relpath].set_installed()
992
993     def load_cycle_suppression_list(self, filename):
994         """Load a list of edges to suppress in cycles.
995
996         These edges between modules, if present, will be marked in the
997         corresponding ModuleDependency objects.
998         """
999         with open(filename, 'r') as fp:
1000             for line in fp:
1001                 line = line.strip()
1002                 if not line or line.startswith('#'):
1003                     continue
1004                 modulenames = ['module_' + x.strip() for x in line.split('->')]
1005                 if len(modulenames) != 2:
1006                     self._reporter.input_error(
1007                             "invalid cycle suppression line: {0}".format(line))
1008                     continue
1009                 firstmodule = self._modules.get(modulenames[0])
1010                 secondmodule = self._modules.get(modulenames[1])
1011                 if not firstmodule or not secondmodule:
1012                     self._reporter.input_error(
1013                             "unknown modules mentioned on cycle suppression line: {0}".format(line))
1014                     continue
1015                 for dep in firstmodule.get_dependencies():
1016                     if dep.get_other_module() == secondmodule:
1017                         # TODO: Check that each suppression is actually part of
1018                         # a cycle.
1019                         dep.set_cycle_suppression()
1020
1021     def get_object(self, docobj):
1022         """Get tree object for a Doxygen XML object."""
1023         if docobj is None:
1024             return None
1025         return self._docmap.get(docobj)
1026
1027     def get_files(self):
1028         """Get iterable for all files in the source tree."""
1029         return self._files.itervalues()
1030
1031     def get_modules(self):
1032         """Get iterable for all modules in the source tree."""
1033         return self._modules.itervalues()
1034
1035     def get_classes(self):
1036         """Get iterable for all classes in the source tree."""
1037         return self._classes
1038
1039     def get_members(self):
1040         """Get iterable for all members (in Doxygen terms) in the source tree."""
1041         return self._members