Split lines with many copyright years
[alexxy/gromacs.git] / docs / doxygen / gmxtree.py
1 #!/usr/bin/env python3
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
6 # Copyright (c) 2019,2020, by the GROMACS development team, led by
7 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 # and including many others, as listed in the AUTHORS file in the
9 # top-level source directory and at http://www.gromacs.org.
10 #
11 # GROMACS is free software; you can redistribute it and/or
12 # modify it under the terms of the GNU Lesser General Public License
13 # as published by the Free Software Foundation; either version 2.1
14 # of the License, or (at your option) any later version.
15 #
16 # GROMACS is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 # Lesser General Public License for more details.
20 #
21 # You should have received a copy of the GNU Lesser General Public
22 # License along with GROMACS; if not, see
23 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25 #
26 # If you want to redistribute modifications to GROMACS, please
27 # consider that scientific software is very special. Version
28 # control is crucial - bugs must be traceable. We will be happy to
29 # consider code for inclusion in the official distribution, but
30 # derived work must not be called official GROMACS. Details are found
31 # in the README & COPYING files - if they are missing, get the
32 # official version at http://www.gromacs.org.
33 #
34 # To help us fund GROMACS development, we humbly ask that you cite
35 # the research papers on the package. Check out http://www.gromacs.org.
36
37 """GROMACS-specific representation for source tree and documentation.
38
39 This module provides classes that construct a GROMACS-specific representation
40 of the source tree and associate the Doxygen XML output with it.  It constructs
41 an initial representation by walking the source tree in the file system, and
42 then associates information from the Doxygen XML output into this.
43 It also adds some additional knowledge from how the GROMACS source tree is
44 organized to construct a representation that is easy to process and check as
45 the top-level scripts expect.
46
47 The object model is rooted at a GromacsTree object.  Currently, it constructs a
48 representation of the source tree from the file system, but is otherwise mostly
49 a thin wrapper around the Doxygen XML tree.  It already adds some relations and
50 rules that come from GROMACS-specific knowledge.  In the future, more such
51 customizations will be added.
52 """
53
54 import collections
55 import os
56 import os.path
57 import re
58 import subprocess
59
60 import doxygenxml as xml
61 import reporter
62 # We import DocType directly so that it is exposed from this module as well.
63 from doxygenxml import DocType
64
65 def _get_api_type_for_compound(grouplist):
66     """Helper function to deduce API type from Doxygen group membership."""
67     result = DocType.internal
68     for group in grouplist:
69         if isinstance(group, xml.Group):
70             if group.get_name() == 'group_publicapi':
71                 result = DocType.public
72             elif group.get_name() == 'group_libraryapi':
73                 result = DocType.library
74             # TODO: Check for multiple group membership
75     return result
76
77 class IncludedFile(object):
78
79     """Information about an #include directive in a file."""
80
81     def __init__(self, including_file, lineno, included_file, included_path, is_relative, is_system, line):
82         self._including_file = including_file
83         self._line_number = lineno
84         self._included_file = included_file
85         self._included_path = included_path
86         #self._used_include_path = used_include_path
87         self._is_relative = is_relative
88         self._is_system = is_system
89         self._line = line
90
91     def __str__(self):
92         if self._is_system:
93             return '<{0}>'.format(self._included_path)
94         else:
95             return '"{0}"'.format(self._included_path)
96
97     def __lt__(self, other):
98         return str(self) < str(other)
99
100     def is_system(self):
101         return self._is_system
102
103     def is_relative(self):
104         return self._is_relative
105
106     def get_included_path(self):
107         return self._included_path
108
109     def get_including_file(self):
110         return self._including_file
111
112     def get_file(self):
113         return self._included_file
114
115     def get_line_number(self):
116         return self._line_number
117
118     def get_full_line(self):
119         """Return the full source line on which this include appears.
120
121         Trailing newline is included."""
122         return self._line
123
124     def get_reporter_location(self):
125         return reporter.Location(self._including_file.get_abspath(), self._line_number)
126
127 class IncludeBlock(object):
128
129     """Block of consequent #include directives in a file."""
130
131     def __init__(self, first_included_file):
132         self._first_line = first_included_file.get_line_number()
133         self._last_line = self._first_line
134         self._files = []
135         self.add_file(first_included_file)
136
137     def add_file(self, included_file):
138         self._files.append(included_file)
139         self._last_line = included_file.get_line_number()
140
141     def get_includes(self):
142         return self._files
143
144     def get_first_line(self):
145         return self._first_line
146
147     def get_last_line(self):
148         return self._last_line
149
150 class File(object):
151
152     """Source/header file in the GROMACS tree."""
153
154     def __init__(self, abspath, relpath, directory):
155         """Initialize a file representation with basic information."""
156         self._abspath = abspath
157         self._relpath = relpath
158         self._dir = directory
159         self._rawdoc = None
160         extension = os.path.splitext(abspath)[1]
161         self._sourcefile = (extension in ('.c', '.cc', '.cpp', '.cu'))
162         self._apitype = DocType.none
163         self._modules = set()
164         self._includes = []
165         self._include_blocks = []
166         self._main_header = None
167         self._lines = None
168         self._filter = None
169         self._declared_defines = None
170         self._used_defines = dict()
171         directory.add_file(self)
172
173     def set_doc_xml(self, rawdoc, sourcetree):
174         """Assiociate Doxygen documentation entity with the file."""
175         assert self._rawdoc is None
176         assert rawdoc.is_source_file() == self._sourcefile
177         self._rawdoc = rawdoc
178         if self._rawdoc.is_documented():
179             grouplist = self._rawdoc.get_groups()
180             self._apitype = _get_api_type_for_compound(grouplist)
181             for group in grouplist:
182                 module = sourcetree.get_object(group)
183                 if module:
184                     self._modules.add(module)
185
186     def set_git_filter_attribute(self, filtername):
187         """Set the git filter attribute associated with the file."""
188         self._filter = filtername
189
190     def set_main_header(self, included_file):
191         """Set the main header file for a source file."""
192         assert self.is_source_file()
193         self._main_header = included_file
194
195     def _process_include(self, lineno, is_system, includedpath, line, sourcetree):
196         """Process #include directive during scan()."""
197         is_relative = False
198         if is_system:
199             fileobj = sourcetree.find_include_file(includedpath)
200         else:
201             fullpath = os.path.join(self._dir.get_abspath(), includedpath)
202             fullpath = os.path.abspath(fullpath)
203             if os.path.exists(fullpath):
204                 is_relative = True
205                 fileobj = sourcetree.get_file(fullpath)
206             else:
207                 fileobj = sourcetree.find_include_file(includedpath)
208         included_file = IncludedFile(self, lineno, fileobj, includedpath,
209             is_relative, is_system, line)
210         self._includes.append(included_file)
211         return included_file
212
213     def scan_contents(self, sourcetree, keep_contents, detect_defines):
214         """Scan the file contents and initialize information based on it."""
215         # TODO: Consider a more robust regex.
216         include_re = r'^\s*#\s*include\s+(?P<quote>["<])(?P<path>[^">]*)[">]'
217         define_re = r'^\s*#.*define(?:01)?\s+(\w*)'
218         current_block = None
219         with open(self._abspath, 'r', encoding='utf8') as scanfile:
220             contents = scanfile.read()
221         lines = contents.splitlines(True)
222         for lineno, line in enumerate(lines, 1):
223             match = re.match(include_re, line)
224             if match:
225                 is_system = (match.group('quote') == '<')
226                 includedpath = match.group('path')
227                 included_file = self._process_include(lineno, is_system,
228                         includedpath, line, sourcetree)
229                 if current_block is None:
230                     current_block = IncludeBlock(included_file)
231                     self._include_blocks.append(current_block)
232                 else:
233                     current_block.add_file(included_file)
234             elif line and not line.isspace():
235                 current_block = None
236         if detect_defines:
237             self._declared_defines = []
238             for line in lines:
239                 match = re.match(define_re, line)
240                 if match:
241                     self._declared_defines.append(match.group(1))
242         if keep_contents:
243             self._lines = lines
244
245     def add_used_defines(self, define_file, defines):
246         """Add defines used in this file.
247
248         Used internally by find_define_file_uses()."""
249         if define_file not in self._used_defines:
250             self._used_defines[define_file] = set()
251         self._used_defines[define_file].update(defines)
252
253     def get_reporter_location(self):
254         return reporter.Location(self._abspath, None)
255
256     def is_external(self):
257         return self._dir.is_external()
258
259     def is_source_file(self):
260         return self._sourcefile
261
262     def is_test_file(self):
263         return self._dir.is_test_directory()
264
265     def should_includes_be_sorted(self):
266         """Return whether the include directives in the file should be sorted."""
267         return self._filter in ('includesort', 'complete_formatting')
268
269     def is_documented(self):
270         return self._rawdoc and self._rawdoc.is_documented()
271
272     def has_brief_description(self):
273         return self._rawdoc and self._rawdoc.has_brief_description()
274
275     def get_abspath(self):
276         return self._abspath
277
278     def get_relpath(self):
279         return self._relpath
280
281     def get_name(self):
282         return os.path.basename(self._abspath)
283
284     def get_directory(self):
285         return self._dir
286
287     def get_doc_type(self):
288         if not self._rawdoc:
289             return DocType.none
290         return self._rawdoc.get_visibility()
291
292     def get_api_type(self):
293         return self._apitype
294
295     def api_type_is_reliable(self):
296         if self._apitype in (DocType.internal, DocType.library):
297             return True
298         module = self.get_module()
299         return module and module.is_documented()
300
301     def is_public(self):
302         return self.get_api_type() == DocType.public
303
304     def is_module_internal(self):
305         if self.is_source_file():
306             return True
307         return self.get_api_type() <= DocType.internal
308
309     def get_expected_module(self):
310         return self._dir.get_module()
311
312     def get_doc_modules(self):
313         return self._modules
314
315     def get_module(self):
316         module = self.get_expected_module()
317         if not module and len(self._modules) == 1:
318             module = list(self._modules)[0]
319         return module
320
321     def get_includes(self):
322         return self._includes
323
324     def get_include_blocks(self):
325         return self._include_blocks
326
327     def _get_included_files_recurse(self, result):
328         for include in self._includes:
329             included_file = include.get_file()
330             if included_file is not None and not included_file in result:
331                 result.add(included_file)
332                 included_file._get_included_files_recurse(result)
333
334     def get_included_files(self, recursive=False):
335         if recursive:
336             result = set()
337             self._get_included_files_recurse(result)
338             return result
339         return set([x.get_file() for x in self._includes])
340
341     def get_main_header(self):
342         return self._main_header
343
344     def get_contents(self):
345         return self._lines
346
347     def get_declared_defines(self):
348         """Return set of defines declared in this file.
349
350         The information is only populated for selected files."""
351         return self._declared_defines
352
353     def get_used_define_files(self):
354         """Return files like config.h whose defines are used in this file.
355
356         The return value is empty if find_define_file_uses() has not been called,
357         as well as for headers that declare these defines."""
358         return set(self._used_defines.keys())
359
360     def get_used_defines(self, define_file):
361         """Return set of defines used in this file for a given file like config.h.
362         """
363         return self._used_defines.get(define_file, set())
364
365 class GeneratedFile(File):
366     def __init__(self, abspath, relpath, directory):
367         File.__init__(self, abspath, relpath, directory)
368         self._generator_source_file = None
369
370     def scan_contents(self, sourcetree, keep_contents, detect_defines):
371         if os.path.exists(self.get_abspath()):
372             File.scan_contents(self, sourcetree, keep_contents, False)
373
374     def set_generator_source(self, sourcefile):
375         self._generator_source_file = sourcefile
376
377     def get_generator_source(self):
378         return self._generator_source_file
379
380     def get_reporter_location(self):
381         if self._generator_source_file:
382             return self._generator_source_file.get_reporter_location()
383         return File.get_reporter_location(self)
384
385     def get_declared_defines(self):
386         if self._generator_source_file:
387             return self._generator_source_file.get_declared_defines()
388         return File.get_declared_defines(self)
389
390 class GeneratorSourceFile(File):
391     pass
392
393 class Directory(object):
394
395     """(Sub)directory in the GROMACS tree."""
396
397     def __init__(self, abspath, relpath, parent):
398         """Initialize a file representation with basic information."""
399         self._abspath = abspath
400         self._relpath = relpath
401         self._name = os.path.basename(abspath)
402         self._parent = parent
403         self._rawdoc = None
404         self._module = None
405         self._is_test_dir = False
406         if parent and parent.is_test_directory() or \
407                 self._name == 'tests':
408             self._is_test_dir = True
409         self._is_external = False
410         if parent and parent.is_external() or self._name == 'external':
411             self._is_external = True
412         self._subdirs = set()
413         if parent:
414             parent._subdirs.add(self)
415         self._files = set()
416
417     def set_doc_xml(self, rawdoc, sourcetree):
418         """Assiociate Doxygen documentation entity with the directory."""
419         assert self._rawdoc is None
420         assert rawdoc.get_path().rstrip('/') in (self._abspath, self._relpath)
421         self._rawdoc = rawdoc
422
423     def set_module(self, module):
424         assert self._module is None
425         self._module = module
426
427     def add_file(self, fileobj):
428         self._files.add(fileobj)
429
430     def get_name(self):
431         return self._name
432
433     def get_reporter_location(self):
434         return reporter.Location(self._abspath, None)
435
436     def get_abspath(self):
437         return self._abspath
438
439     def get_relpath(self):
440         return self._relpath
441
442     def is_test_directory(self):
443         return self._is_test_dir
444
445     def is_external(self):
446         return self._is_external
447
448     def get_module(self):
449         if self._module:
450             return self._module
451         if self._parent:
452             return self._parent.get_module()
453         return None
454
455     def get_subdirectories(self):
456         return self._subdirs
457
458     def get_files(self):
459         for subdir in self._subdirs:
460             for fileobj in subdir.get_files():
461                 yield fileobj
462         for fileobj in self._files:
463             yield fileobj
464
465     def contains(self, fileobj):
466         """Check whether file is within the directory or its subdirectories."""
467         dirobj = fileobj.get_directory()
468         while dirobj:
469             if dirobj == self:
470                 return True
471             dirobj = dirobj._parent
472         return False
473
474 class ModuleDependency(object):
475
476     """Dependency between modules."""
477
478     def __init__(self, othermodule):
479         """Initialize empty dependency object with given module as dependency."""
480         self._othermodule = othermodule
481         self._includedfiles = []
482         self._cyclesuppression = None
483         self._is_test_only_dependency = True
484         self.suppression_used = True
485
486     def add_included_file(self, includedfile):
487         """Add IncludedFile that is part of this dependency."""
488         assert includedfile.get_file().get_module() == self._othermodule
489         if not includedfile.get_including_file().is_test_file():
490             self._is_test_only_dependency = False
491         self._includedfiles.append(includedfile)
492
493     def set_cycle_suppression(self):
494         """Set suppression on cycles containing this dependency."""
495         self._cyclesuppression = True
496         self.suppression_used = False
497
498     def is_cycle_suppressed(self):
499         """Return whether cycles containing this dependency are suppressed."""
500         self.suppression_used = True
501         return self._cyclesuppression is not None
502
503     def is_test_only_dependency(self):
504         """Return whether this dependency is only from test code."""
505         return self._is_test_only_dependency
506
507     def get_other_module(self):
508         """Get module that this dependency is to."""
509         return self._othermodule
510
511     def get_included_files(self):
512         """Get IncludedFile objects for the individual include dependencies."""
513         return self._includedfiles
514
515 class Module(object):
516
517     """Code module in the GROMACS source tree.
518
519     Modules are specific subdirectories that host a more or less coherent
520     set of routines.  Simplified, every subdirectory under src/gromacs/ is
521     a different module.  This object provides that abstraction and also links
522     the subdirectory to the module documentation (documented as a group in
523     Doxygen) if that exists.
524     """
525
526     def __init__(self, name, rootdir):
527         self._name = name
528         self._rawdoc = None
529         self._rootdir = rootdir
530         self._group = None
531         self._dependencies = dict()
532
533     def set_doc_xml(self, rawdoc, sourcetree):
534         """Assiociate Doxygen documentation entity with the module."""
535         assert self._rawdoc is None
536         self._rawdoc = rawdoc
537         if self._rawdoc.is_documented():
538             groups = list(self._rawdoc.get_groups())
539             if len(groups) == 1:
540                 groupname = groups[0].get_name()
541                 if groupname.startswith('group_'):
542                     self._group = groupname[6:]
543
544     def add_dependency(self, othermodule, includedfile):
545         """Add #include dependency from a file in this module."""
546         assert includedfile.get_file().get_module() == othermodule
547         if othermodule not in self._dependencies:
548             self._dependencies[othermodule] = ModuleDependency(othermodule)
549         self._dependencies[othermodule].add_included_file(includedfile)
550
551     def is_documented(self):
552         return self._rawdoc is not None
553
554     def get_name(self):
555         return self._name
556
557     def get_root_dir(self):
558         return self._rootdir
559
560     def get_files(self):
561         # TODO: Include public API convenience headers?
562         return self._rootdir.get_files()
563
564     def get_group(self):
565         return self._group
566
567     def get_dependencies(self):
568         return self._dependencies.values()
569
570 class Namespace(object):
571
572     """Namespace in the GROMACS source code."""
573
574     def __init__(self, rawdoc):
575         self._rawdoc = rawdoc
576
577     def is_anonymous(self):
578         return self._rawdoc.is_anonymous()
579
580 class Class(object):
581
582     """Class/struct/union in the GROMACS source code."""
583
584     def __init__(self, rawdoc, files):
585         self._rawdoc = rawdoc
586         self._files = set(files)
587
588     def get_name(self):
589         return self._rawdoc.get_name()
590
591     def get_reporter_location(self):
592         return self._rawdoc.get_reporter_location()
593
594     def get_files(self):
595         return self._files
596
597     def is_documented(self):
598         return self._rawdoc.is_documented()
599
600     def has_brief_description(self):
601         return self._rawdoc.has_brief_description()
602
603     def get_doc_type(self):
604         """Return documentation type (visibility) for the class.
605
606         In addition to the actual code, this encodes GROMACS-specific logic
607         of setting EXTRACT_LOCAL_CLASSES=YES only for the full documentation.
608         Local classes never appear outside the full documentation, no matter
609         what is their visibility.
610         """
611         if not self.is_documented():
612             return DocType.none
613         if self._rawdoc.is_local():
614             return DocType.internal
615         return self._rawdoc.get_visibility()
616
617     def get_file_doc_type(self):
618         return max([fileobj.get_doc_type() for fileobj in self._files])
619
620 class Member(object):
621
622     """Member (in Doxygen terminology) in the GROMACS source tree.
623
624     Currently, modeling is limited to the minimal set of properties that the
625     checker uses.
626     """
627
628     def __init__(self, rawdoc, namespace):
629         self._rawdoc = rawdoc
630         self._namespace = namespace
631
632     def get_name(self):
633         return self._rawdoc.get_name()
634
635     def get_reporter_location(self):
636         return self._rawdoc.get_reporter_location()
637
638     def is_documented(self):
639         return self._rawdoc.is_documented()
640
641     def has_brief_description(self):
642         return self._rawdoc.has_brief_description()
643
644     def has_inbody_description(self):
645         return self._rawdoc.has_inbody_description()
646
647     def is_visible(self):
648         """Return whether the member is visible in Doxygen documentation.
649
650         Doxygen ignores members whose parent compounds are not documented.
651         However, when EXTRACT_ANON_NPACES=ON (which is set for our full
652         documentation), members of anonymous namespaces are extracted even if
653         the namespace is the only parent and is not documented.
654         """
655         if self._namespace and self._namespace.is_anonymous():
656             return True
657         return self._rawdoc.get_inherited_visibility() != DocType.none
658
659
660 class GromacsTree(object):
661
662     """Root object for navigating the GROMACS source tree.
663
664     On initialization, the list of files and directories is initialized by
665     walking the source tree, and modules are created for top-level
666     subdirectories.  At this point, only information that is accessible from
667     file names and paths only is available.
668
669     load_git_attributes() can be called to load attribute information from
670     .gitattributes for all the files.
671
672     scan_files() can be called to read all the files and initialize #include
673     dependencies between the files based on the information.  This is done like
674     this instead of relying on Doxygen-extracted include files to make the
675     dependency graph independent from preprocessor macro definitions
676     (Doxygen only sees those #includes that the preprocessor sees, which
677     depends on what #defines it has seen).
678
679     find_define_file_uses() can be called to find all uses of defines
680     declared in config.h and some other macro headers. In the current
681     implementation, scan_files() must have been called earlier.
682
683     load_xml() can be called to load information from Doxygen XML data in
684     the build tree (the Doxygen XML data must have been built separately).
685     """
686
687     def __init__(self, source_root, build_root, reporter):
688         """Initialize the tree object by walking the source tree."""
689         self._source_root = os.path.abspath(source_root)
690         self._build_root = os.path.abspath(build_root)
691         self._reporter = reporter
692         self._docset = None
693         self._docmap = dict()
694         self._dirs = dict()
695         self._files = dict()
696         self._modules = dict()
697         self._classes = set()
698         self._namespaces = set()
699         self._members = set()
700         self._walk_dir(os.path.join(self._source_root, 'src'))
701         for fileobj in self.get_files():
702             if fileobj and fileobj.is_source_file() and not fileobj.is_external():
703                 (basedir, name) = os.path.split(fileobj.get_abspath())
704                 (basename, ext) = os.path.splitext(name)
705                 header = self.get_file(os.path.join(basedir, basename + '.h'))
706                 if not header and ext == '.cu':
707                     header = self.get_file(os.path.join(basedir, basename + '.cuh'))
708                 if not header and fileobj.is_test_file():
709                     basedir = os.path.dirname(basedir)
710                     header = self.get_file(os.path.join(basedir, basename + '.h'))
711                     if not header:
712                         # Somewhat of a hack; currently, the tests for
713                         # analysisdata/modules/ and trajectoryanalysis/modules/
714                         # is at the top-level tests directory.
715                         # TODO: It could be clearer to split the tests so that
716                         # there would be a separate modules/tests/.
717                         header = self.get_file(os.path.join(basedir, 'modules', basename + '.h'))
718                     if not header and basename.endswith('_tests'):
719                         header = self.get_file(os.path.join(basedir, basename[:-6] + '.h'))
720                 if header:
721                     fileobj.set_main_header(header)
722         rootdir = self._get_dir(os.path.join('src', 'gromacs'))
723         for subdir in rootdir.get_subdirectories():
724             self._create_module(subdir)
725         rootdir = self._get_dir(os.path.join('src', 'testutils'))
726         self._create_module(rootdir)
727
728     def _get_rel_path(self, path):
729         assert os.path.isabs(path)
730         if path.startswith(self._build_root):
731             return os.path.relpath(path, self._build_root)
732         if path.startswith(self._source_root):
733             return os.path.relpath(path, self._source_root)
734         raise ValueError("path not under build nor source tree: {0}".format(path))
735
736     def _walk_dir(self, rootpath):
737         """Construct representation of the source tree by walking the file system."""
738         assert os.path.isabs(rootpath)
739         assert rootpath not in self._dirs
740         relpath = self._get_rel_path(rootpath)
741         self._dirs[relpath] = Directory(rootpath, relpath, None)
742         for dirpath, dirnames, filenames in os.walk(rootpath):
743             if 'refdata' in dirnames:
744                 dirnames.remove('refdata')
745             currentdir = self._dirs[self._get_rel_path(dirpath)]
746             # Loop through a copy so that we can modify dirnames.
747             for dirname in list(dirnames):
748                 fullpath = os.path.join(dirpath, dirname)
749                 if fullpath == self._build_root:
750                     dirnames.remove(dirname)
751                     continue
752                 relpath = self._get_rel_path(fullpath)
753                 self._dirs[relpath] = Directory(fullpath, relpath, currentdir)
754             extensions = ('.h', '.cuh', '.hpp', '.c', '.cc', '.cpp', '.cu', '.bm')
755             for filename in filenames:
756                 basename, extension = os.path.splitext(filename)
757                 if extension in extensions:
758                     fullpath = os.path.join(dirpath, filename)
759                     relpath = self._get_rel_path(fullpath)
760                     self._files[relpath] = File(fullpath, relpath, currentdir)
761                 elif extension == '.cmakein':
762                     extension = os.path.splitext(basename)[1]
763                     if extension in extensions:
764                         fullpath = os.path.join(dirpath, filename)
765                         relpath = self._get_rel_path(fullpath)
766                         sourcefile = GeneratorSourceFile(fullpath, relpath, currentdir)
767                         self._files[relpath] = sourcefile
768                         fullpath = os.path.join(dirpath, basename)
769                         relpath = self._get_rel_path(fullpath)
770                         fullpath = os.path.join(self._build_root, relpath)
771                         generatedfile = GeneratedFile(fullpath, relpath, currentdir)
772                         self._files[relpath] = generatedfile
773                         generatedfile.set_generator_source(sourcefile)
774                 elif extension in ('.l', '.y', '.pre'):
775                     fullpath = os.path.join(dirpath, filename)
776                     relpath = self._get_rel_path(fullpath)
777                     self._files[relpath] = GeneratorSourceFile(fullpath, relpath, currentdir)
778
779     def _create_module(self, rootdir):
780         """Create module for a subdirectory."""
781         name = 'module_' + rootdir.get_name()
782         moduleobj = Module(name, rootdir)
783         rootdir.set_module(moduleobj)
784         self._modules[name] = moduleobj
785
786     def scan_files(self, only_files=None, keep_contents=False):
787         """Read source files to initialize #include dependencies."""
788         if only_files:
789             filelist = only_files
790         else:
791             filelist = self._files.values()
792         define_files = list(self.get_checked_define_files())
793         for define_file in list(define_files):
794             if isinstance(define_file, GeneratedFile) and \
795                     define_file.get_generator_source() is not None:
796                 define_files.append(define_file.get_generator_source())
797         for fileobj in filelist:
798             if not fileobj.is_external():
799                 detect_defines = fileobj in define_files
800                 fileobj.scan_contents(self, keep_contents, detect_defines)
801                 module = fileobj.get_module()
802                 if module:
803                     for includedfile in fileobj.get_includes():
804                         otherfile = includedfile.get_file()
805                         if otherfile:
806                             othermodule = otherfile.get_module()
807                             if othermodule and othermodule != module:
808                                 module.add_dependency(othermodule, includedfile)
809
810     def load_xml(self, only_files=None):
811         """Load Doxygen XML information.
812
813         If only_files is True, XML data is not loaded for code constructs, but
814         only for files, directories, and their potential parents.
815         """
816         xmldir = os.path.join(self._build_root, 'docs', 'html', 'doxygen', 'xml')
817         self._docset = xml.DocumentationSet(xmldir, self._reporter)
818         if only_files:
819             if isinstance(only_files, collections.Iterable):
820                 filelist = [x.get_relpath() for x in only_files]
821                 self._docset.load_file_details(filelist)
822             else:
823                 self._docset.load_file_details()
824         else:
825             self._docset.load_details()
826             self._docset.merge_duplicates()
827         self._load_dirs()
828         self._load_modules()
829         self._load_files()
830         if not only_files:
831             self._load_namespaces()
832             self._load_classes()
833             self._load_members()
834
835     def _load_dirs(self):
836         """Load Doxygen XML directory information."""
837         rootdirs = self._docset.get_compounds(xml.Directory,
838                 lambda x: x.get_parent() is None)
839         for dirdoc in rootdirs:
840             self._load_dir(dirdoc, None)
841
842     def _load_dir(self, dirdoc, parent):
843         """Load Doxygen XML directory information for a single directory."""
844         path = dirdoc.get_path().rstrip('/')
845         if not os.path.isabs(path):
846             path = os.path.join(self._source_root, path)
847         relpath = self._get_rel_path(path)
848         dirobj = self._dirs.get(relpath)
849         if not dirobj:
850             dirobj = Directory(path, relpath, parent)
851             self._dirs[relpath] = dirobj
852         dirobj.set_doc_xml(dirdoc, self)
853         self._docmap[dirdoc] = dirobj
854         for subdirdoc in dirdoc.get_subdirectories():
855             self._load_dir(subdirdoc, dirobj)
856
857     def _load_modules(self):
858         """Load Doxygen XML module (group) information."""
859         moduledocs = self._docset.get_compounds(xml.Group,
860                 lambda x: x.get_name().startswith('module_'))
861         for moduledoc in moduledocs:
862             moduleobj = self._modules.get(moduledoc.get_name())
863             if not moduleobj:
864                 self._reporter.input_error(
865                         "no matching directory for module: {0}".format(moduledoc))
866                 continue
867             moduleobj.set_doc_xml(moduledoc, self)
868             self._docmap[moduledoc] = moduleobj
869
870     def _load_files(self):
871         """Load Doxygen XML file information."""
872         for filedoc in self._docset.get_files():
873             path = filedoc.get_path()
874             if not path:
875                 # In case of only partially loaded file information,
876                 # the path information is not set for unloaded files.
877                 continue
878             if not os.path.isabs(path):
879                 path = os.path.join(self._source_root, path)
880             extension = os.path.splitext(path)[1]
881             # We don't care about Markdown files that only produce pages
882             # (and fail the directory check below).
883             if extension == '.md':
884                 continue
885             dirdoc = filedoc.get_directory()
886             if not dirdoc:
887                 self._reporter.xml_assert(filedoc.get_xml_path(),
888                         "file is not in any directory in Doxygen")
889                 continue
890             relpath = self._get_rel_path(path)
891             fileobj = self._files.get(relpath)
892             if not fileobj:
893                 fileobj = File(path, relpath, self._docmap[dirdoc])
894                 self._files[relpath] = fileobj
895             fileobj.set_doc_xml(filedoc, self)
896             self._docmap[filedoc] = fileobj
897
898     def _load_namespaces(self):
899         """Load Doxygen XML namespace information."""
900         nsdocs = self._docset.get_namespaces()
901         for nsdoc in nsdocs:
902             nsobj = Namespace(nsdoc)
903             self._docmap[nsdoc] = nsobj
904             self._namespaces.add(nsobj)
905
906     def _load_classes(self):
907         """Load Doxygen XML class information."""
908         classdocs = self._docset.get_classes()
909         for classdoc in classdocs:
910             files = [self._docmap[filedoc] for filedoc in classdoc.get_files()]
911             classobj = Class(classdoc, files)
912             self._docmap[classdoc] = classobj
913             self._classes.add(classobj)
914
915     def _load_members(self):
916         """Load Doxygen XML member information."""
917         memberdocs = self._docset.get_members()
918         for memberdoc in memberdocs:
919             nsdoc = memberdoc.get_namespace()
920             nsobj = self.get_object(nsdoc)
921             memberobj = Member(memberdoc, nsobj)
922             self._docmap[memberdoc] = memberobj
923             self._members.add(memberobj)
924
925     def _get_dir(self, relpath):
926         """Get directory object for a path relative to source tree root."""
927         return self._dirs.get(relpath)
928
929     def get_file(self, path):
930         """Get file object for a path relative to source tree root."""
931         return self._files.get(self._get_rel_path(path))
932
933     def find_include_file(self, includedpath):
934         """Find a file object corresponding to an include path."""
935         for testdir in ('src', 'src/external/thread_mpi/include',
936                 'src/external/tng_io/include'):
937             testpath = os.path.join(testdir, includedpath)
938             if testpath in self._files:
939                 return self._files[testpath]
940
941     def load_git_attributes(self):
942         """Load git attribute information for files."""
943         args = ['git', 'check-attr', '--stdin', 'filter']
944         git_check_attr = subprocess.Popen(args, stdin=subprocess.PIPE,
945                 stdout=subprocess.PIPE, cwd=self._source_root)
946         filelist = '\n'.join(map(File.get_relpath, self._files.values()))
947         filters = git_check_attr.communicate(filelist.encode())[0].decode()
948         for fileinfo in filters.splitlines():
949             path, dummy, value = fileinfo.split(': ')
950             fileobj = self._files.get(path)
951             assert fileobj is not None
952             fileobj.set_git_filter_attribute(value)
953
954     def find_define_file_uses(self):
955         """Find files that use defines from config.h."""
956         # Executing git grep is substantially faster than using the define_re
957         # directly on the contents of the file in Python.
958         for define_file in self.get_checked_define_files():
959             excluded_files = set([define_file])
960             excluded_files.update(define_file.get_included_files(recursive=True))
961             all_defines = define_file.get_declared_defines()
962             args = ['git', 'grep', '-zwIF']
963             for define in all_defines:
964                 args.extend(['-e', define])
965             args.extend(['--', '*.cpp', '*.c', '*.cu', '*.h', '*.cuh'])
966             define_re = r'\b(?:' + '|'.join(all_defines)+ r')\b'
967             output = subprocess.check_output(args, cwd=self._source_root).decode()
968             for line in output.splitlines():
969                 (filename, text) = line.split('\0')
970                 fileobj = self._files.get(filename)
971                 if fileobj is not None and fileobj not in excluded_files:
972                     defines = re.findall(define_re, text)
973                     fileobj.add_used_defines(define_file, defines)
974
975     def load_cycle_suppression_list(self, filename):
976         """Load a list of edges to suppress in cycles.
977
978         These edges between modules, if present, will be marked in the
979         corresponding ModuleDependency objects.
980         """
981         with open(filename, 'r') as fp:
982             for line in fp:
983                 line = line.strip()
984                 if not line or line.startswith('#'):
985                     continue
986                 modulenames = ['module_' + x.strip() for x in line.split('->')]
987                 if len(modulenames) != 2:
988                     self._reporter.input_error(
989                             "invalid cycle suppression line: {0}".format(line))
990                     continue
991                 firstmodule = self._modules.get(modulenames[0])
992                 secondmodule = self._modules.get(modulenames[1])
993                 if not firstmodule or not secondmodule:
994                     self._reporter.input_error(
995                             "unknown modules mentioned on cycle suppression line: {0}".format(line))
996                     continue
997                 for dep in firstmodule.get_dependencies():
998                     if dep.get_other_module() == secondmodule:
999                         dep.set_cycle_suppression()
1000                         break
1001                 else:
1002                     self._reporter.cyclic_issue("unused cycle suppression: {0}".format(line))
1003
1004     def report_unused_cycle_suppressions(self, reporter):
1005         """Reports unused cycle suppressions."""
1006         for module in self.get_modules():
1007             for dep in module.get_dependencies():
1008                 if not dep.suppression_used:
1009                     reporter.cyclic_issue("unused cycle suppression: {0} -> {1}".format(module.get_name()[7:], dep.get_other_module().get_name()[7:]))
1010
1011     def get_object(self, docobj):
1012         """Get tree object for a Doxygen XML object."""
1013         if docobj is None:
1014             return None
1015         return self._docmap.get(docobj)
1016
1017     def get_files(self):
1018         """Get iterable for all files in the source tree."""
1019         return self._files.values()
1020
1021     def get_modules(self):
1022         """Get iterable for all modules in the source tree."""
1023         return self._modules.values()
1024
1025     def get_classes(self):
1026         """Get iterable for all classes in the source tree."""
1027         return self._classes
1028
1029     def get_members(self):
1030         """Get iterable for all members (in Doxygen terms) in the source tree."""
1031         return self._members
1032
1033     def get_checked_define_files(self):
1034         """Get list of files that contain #define macros whose usage needs to
1035         be checked."""
1036         return (self._files['src/config.h'],
1037                 self._files['src/gromacs/simd/simd.h'],
1038                 self._files['src/gromacs/ewald/pme_simd.h'],
1039                 self._files['src/gromacs/nbnxm/nbnxm_simd.h'])