Make doxygen assertion more helpful
[alexxy/gromacs.git] / docs / doxygen / doxygenxml.py
1 #!/usr/bin/env python3
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2014,2015,2016,2018,2019,2020,2021, by the GROMACS development team, led by
6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 # and including many others, as listed in the AUTHORS file in the
8 # top-level source directory and at http://www.gromacs.org.
9 #
10 # GROMACS is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public License
12 # as published by the Free Software Foundation; either version 2.1
13 # of the License, or (at your option) any later version.
14 #
15 # GROMACS is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 # Lesser General Public License for more details.
19 #
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with GROMACS; if not, see
22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24 #
25 # If you want to redistribute modifications to GROMACS, please
26 # consider that scientific software is very special. Version
27 # control is crucial - bugs must be traceable. We will be happy to
28 # consider code for inclusion in the official distribution, but
29 # derived work must not be called official GROMACS. Details are found
30 # in the README & COPYING files - if they are missing, get the
31 # official version at http://www.gromacs.org.
32 #
33 # To help us fund GROMACS development, we humbly ask that you cite
34 # the research papers on the package. Check out http://www.gromacs.org.
35
36 """Doxygen XML output parser.
37
38 This module implements a parser for the Doxygen XML output, converting it into
39 an object model that can be used to navigate the documentation.  It also uses
40 knowledge from how Doxygen works to provide access to things like visibility of
41 individual member documentation (e.g., based on what is the visibility of its
42 parent compound objects).
43
44 The object model is rooted at a DocumentationSet object.  Each documented
45 entity is modeled as an Entity, and this has subclasses Member and Compound to
46 correspond to the two categories of items that Doxygen handles.  These classes
47 are further subclassed to match each kind of entity that Doxygen produces.
48 Only kinds produced by Doxygen from C/C++ code are modeled.  Everything else
49 is ignored after a warning.
50
51 Currently the member entities are not completely parsed from the XML files, and
52 the interface may need additional work to provide convenient access to all
53 member types and their common properties.  For now, focus is in modeling the
54 compound entities.
55
56 The implementation is mostly independent of any GROMACS-specific rules, except
57 for the following:
58  - DocType.library is a GROMACS-specific construct that is deduced from the
59    contents of the detailed description (presence of a \libinternal command in
60    the Doxygen comment triggers it).
61  - DocType.internal is deduced from the presence of a \internal command that
62    covers the whole detailed description.
63  - List of extensions for determining whether a file is a source file only
64    contains extensions actually used by GROMACS.
65 It would be possible to move these out from this file, but that would require
66 exposing the XML representation for the descriptions, which is not nice either.
67
68 The module can also be run as a script that can dump out different parts of the
69 object model.  This can be used to debug the parser, as well as check what is
70 actually in the XML documentation.
71 """
72
73 import os.path
74 import xml.etree.ElementTree as ET
75 import functools
76
77 import reporter
78
79 #####################################################################
80 # Helper functions and classes
81
82 def _show_list(title, objlist):
83     """Helper function for formatting a list of objects for debug output."""
84     if objlist:
85         print('{0}:'.format(title))
86         for obj in objlist:
87             print('  ', obj)
88
89 @functools.total_ordering
90 class DocType(object):
91
92     """Documentation visibility in the generated documentation."""
93
94     # Mapping to string representations for the internal integer values
95     _names = ['undocumented', 'internal', 'library', 'public']
96
97     def __init__(self, value):
98         """Initialize a DocType instance.
99
100         DocType.{none,internal,library,public} should be used outside the class
101         instead of calling the constructor.
102         """
103         self._value = value
104
105     def __str__(self):
106         """Return string representation for the documentation type."""
107         return self._names[self._value]
108
109     def __eq__(self, other):
110         """Order documentation types in the order of visibility."""
111         return self._value == other._value
112
113     def __lt__(self, other):
114         """Order documentation types in the order of visibility."""
115         return self._value < other._value
116
117 # Static values for documentation types.
118 DocType.none = DocType(0)
119 DocType.internal = DocType(1)
120 DocType.library = DocType(2)
121 DocType.public = DocType(3)
122
123 class Location(object):
124
125     """Location of a Doxygen entity.
126
127     This class contains the logic to parse a <location> tag in Doxygen XML.
128     It is used as the entity location in cases where body location is not
129     expected, or as part of a LocationWithBody.
130     """
131
132     def __init__(self, elem):
133         """Initialize location from a <location> element."""
134         self.filepath = elem.attrib['file']
135         self.line = int(elem.attrib['line'])
136         self.column = elem.attrib['column']
137
138     def __str__(self):
139         return '{0}:{1}'.format(self.filepath, self.line)
140
141     def get_reporter_location(self):
142         return reporter.Location(self.filepath, self.line)
143
144     def get_full_string(self):
145         return '{0}:{1}:{2}'.format(self.filepath, self.line, self.column)
146
147 @functools.total_ordering
148 class BodyLocation(object):
149
150     """Body location of a Doxygen entity.
151
152     This class contains the logic to parse a body location from a <location>
153     tag in Doxygen XML.  Not all entities have these attributes.
154     This is only used as part of a LocationWithBody, which handles cases where
155     the body location is optional.
156
157     The body location can be compared and hashed so that it can be used in
158     a dictionary for DocumentationSet.merge_duplicates().
159     """
160
161     def __init__(self, elem):
162         """Initialize body location from a <location> element."""
163         self.filepath = elem.attrib['bodyfile']
164         self.startline = int(elem.attrib['bodystart'])
165         self.endline = int(elem.attrib['bodyend'])
166
167     def __eq__(self, other):
168         return (self.filepath == other.filepath and
169                 self.startline == other.startline and
170                 self.endline == other.endline)
171
172     def __lt__(self, other):
173         if self.filepath != other.filepath:
174             if other.filepath is None:
175                 return False
176             if self.filepath is None:
177                 return True
178             return self.filepath < other.filepath
179         elif self.startline != other.startline:
180             return self.startline < other.startline
181         else:  # check for endline
182             return self.endline < other.endline
183
184     def __hash__(self):
185         return hash(self.filepath) ^ hash(self.startline) ^ hash(self.endline)
186
187     def __str__(self):
188         return '{0}:{1}'.format(self.filepath, self.startline)
189
190     def get_full_string(self):
191         if self.endline < 0:
192             return self.__str__()
193         return '{0}:{1}-{2}'.format(self.filepath, self.startline, self.endline)
194
195 class LocationWithBody(object):
196
197     """Location for a Doxygen entity that can have a body location.
198
199     This class is used to represent the location of a Doxygen entity that can
200     have a body location.
201     """
202
203     def __init__(self, elem):
204         """Initialize location from a <location> element."""
205         self._location = Location(elem)
206         if 'bodyfile' in elem.attrib:
207             self._bodylocation = BodyLocation(elem)
208         else:
209             self._bodylocation = None
210
211     def __str__(self):
212         if not self._bodylocation:
213             return '{0} (no body)'.format(self._location)
214         else:
215             return '{0} / {1}'.format(self._location, self._bodylocation)
216
217     def get_reporter_location(self):
218         """Return reporter location for this location.
219
220         All issues are reported at the main location, which should match with
221         the declaration, where most of the documentation typically is.
222         """
223         return self._location.get_reporter_location()
224
225     def get_location(self):
226         return self._location
227
228     def get_body_location(self):
229         return self._bodylocation
230
231     def has_same_body_location(self):
232         """Check whether main location matches body location.
233
234         If the main location is different, then it likely points to the
235         declaration of the function.
236         """
237         return self._location.filepath == self._bodylocation.filepath and \
238                 self._location.line == self._bodylocation.startline
239
240 class MemberSection(object):
241
242     """Section of members within a compound entity."""
243
244     def __init__(self, kind):
245         self._kind = kind
246         self._members = []
247
248     def __str__(self):
249         return self._kind
250
251     def add_member(self, member):
252         self._members.append(member)
253
254     def replace_member(self, old, new):
255         try:
256             pos = self._members.index(old)
257         except ValueError:
258             return
259         self._members[pos] = new
260
261 #####################################################################
262 # Documentation entities
263
264 class Entity(object):
265
266     """Doxygen documentation entity.
267
268     This class represents common properties of an entity that can contain
269     Doxygen documentation.
270     """
271
272     def __init__(self, name, refid):
273         self._docset = None
274         self._name = name
275         self._id = refid
276         self._has_brief_description = False
277         self._has_detailed_description = False
278         self._has_inbody_description = False
279         self._visibility = DocType.none
280
281     def __str__(self):
282         return self._name
283
284     def _get_reporter(self):
285         """Return reporter to use for parsing issues."""
286         return self._docset.get_reporter()
287
288     def set_documentation_set(self, docset):
289         """Set the documentation set this entity belongs to.
290
291         The documentation set parent provides access to a common reporter
292         object, and also allows the entity to resolve references to other
293         entities while loading XML information.
294         """
295         assert self._docset is None
296         self._docset = docset
297
298     def get_id(self):
299         return self._id
300
301     def get_name(self):
302         return self._name
303
304     def get_reporter_location(self):
305         return reporter.Location('<{0}>'.format(self._name), None)
306
307     def get_visibility(self):
308         return self._visibility
309
310     def is_documented(self):
311         return self._visibility != DocType.none
312
313     def has_brief_description(self):
314         return self._has_brief_description
315
316     def has_inbody_description(self):
317         return self._has_inbody_description
318
319     def _process_descriptions(self, briefelem, detailselem, inbodyelem):
320         reporter = self._get_reporter()
321         if briefelem is not None and len(briefelem) > 0:
322             self._has_brief_description = True
323             self._visibility = DocType.public
324         if detailselem is not None and len(detailselem) > 0:
325             self._visibility = DocType.public
326             # Gromacs-specific:
327             # \internal is used at the beginning of a comment block to
328             # mark the block internal to the module.
329             # \libinternal is used similarly, and inserts custom XML
330             # elements.
331             if detailselem[0].tag == 'internal':
332                 if len(detailselem) == 1:
333                     self._visibility = DocType.internal
334                 else:
335                     # TODO: Should we also check if internal appears elsewhere?
336                     reporter.doc_note(self, '\internal does not cover whole documentation')
337             if detailselem[0].find('libinternal') is not None:
338                 if self._visibility == DocType.public:
339                     self._visibility = DocType.library
340                 else:
341                     reporter.doc_error(self, '\libinternal should not be used inside \internal')
342             self._has_detailed_description = True
343         if inbodyelem is not None:
344             self._has_inbody_description = (len(inbodyelem) > 0)
345
346     def show_base(self):
347         """Format information for common properties.
348
349         This is called from subclass show() methods to show base information
350         about the entity.
351         """
352         print('ID:         {0}'.format(self._id))
353         print('Name:       {0}'.format(self._name))
354         print('Location:   {0}'.format(self.get_reporter_location()))
355         doctype = []
356         if self._has_brief_description:
357             doctype.append('brief')
358         if self._has_detailed_description:
359             doctype.append('details')
360         if self._has_inbody_description:
361             doctype.append('in-body')
362         if not doctype:
363             doctype.append('none')
364         print('Doc:        {0}'.format(', '.join(doctype)))
365         print('Visibility: {0}'.format(self._visibility))
366
367 # Member entities
368
369 class Member(Entity):
370
371     """Member entity.
372
373     In Doxygen, a member entity is an entity such as a function or an enum that
374     cannot contain other documented entities (an enum is a slight exception, as
375     enum values are still nested within the enum member).  A member always
376     belongs to one (or more) compounds, which means that the detailed
377     documentation for the member appears on the documentation page for that
378     compound.  If none of the parent compounds are documented, the member
379     doesn't appear anywhere, even if it is documented.
380
381     Member information is loaded from a parent compound's XML file.  If there
382     is more than one parent, the first one encountered will be used
383     (presumably, Doxygen duplicates the information into each XML file).
384     """
385
386     def __init__(self, name, refid):
387         Entity.__init__(self, name, refid)
388         self._parents = set()
389         self._class = None
390         self._namespace = None
391         self._files = set()
392         self._group = None
393         self._location = None
394         self._alternates = set()
395         self._loaded = False
396         # TODO: Move to Entity?
397         self._xmlpath = None
398
399     def add_parent_compound(self, compound):
400         """Add a compound that contains this member."""
401         self._parents.add(compound)
402         if isinstance(compound, Class):
403             assert self._class is None, 'Class \"{0}\" was already added. Maybe you have two entities with the same name {1}'.format(self._class, self._name)
404             self._class = compound
405         elif isinstance(compound, Namespace):
406             assert self._namespace is None, 'Namespace \"{0}\" was already added. Maybe you have two entities with the same name {1}'.format(self._namespace, self._name)
407             self._namespace = compound
408         elif isinstance(compound, File):
409             self._files.add(compound)
410         elif isinstance(compound, Group):
411             assert self._group is None, 'Group \"{0}\" was already added.'.format(self._group)
412             self._group = compound
413         else:
414             assert False
415
416     def merge_definition(self, definition):
417         """Merge another member into this.
418
419         See DocumentationSet.merge_duplicates().
420         """
421         assert self._class is None
422         assert definition._class is None
423         assert self._group == definition._group
424         assert self._namespace == definition._namespace
425         self._parents.update(definition._parents)
426         self._files.update(definition._files)
427         self._alternates.add(definition)
428
429     def load_details_from_element(self, rootelem, xmlpath):
430         """Load details for the member from a given XML element.
431
432         This method is called when encountering member definitions while
433         processing a compound XML file to load the information for that member.
434         It processes common properties for a member, and delegates other
435         elements to _load_element().
436         """
437         if self._loaded:
438             # TODO: It would be nice to verify that the same information
439             # is present in all instances
440             return
441         self._xmlpath = xmlpath
442         # TODO: Process the attributes
443         reporter = self._get_reporter()
444         briefelem = None
445         detailselem = None
446         inbodyelem = None
447         for elem in rootelem:
448             if elem.tag == 'name':
449                 if elem.text != self.get_name():
450                     reporter.xml_assert(xmlpath,
451                             "member name mismatch: '{0}' (in index.xml) vs. '{1}'".format(
452                                 self.get_name(), elem.text))
453             elif elem.tag == 'briefdescription':
454                 briefelem = elem
455             elif elem.tag == 'detaileddescription':
456                 detailselem = elem
457             elif elem.tag == 'inbodydescription':
458                 # TODO: in-body description is probably only possible for
459                 # functions; move it there.
460                 inbodyelem = elem
461             elif elem.tag == 'location':
462                 self._location = LocationWithBody(elem)
463             else:
464                 if not self._load_element(elem):
465                     # TODO Process the rest of the elements so that we can check this
466                     #reporter.xml_assert(xmlpath,
467                     #        "unknown member child element '{0}'".format(elem.tag))
468                     pass
469         self._process_descriptions(briefelem, detailselem, inbodyelem)
470         self._loaded = True
471
472     def _load_element(self, element):
473         """Load data from a child XML element.
474
475         This method is called for all XML elements under the <memberdef>
476         element that are not handled directly by the Member class.
477         Derived classes should return True if they process the element.
478         """
479         return False
480
481     def _get_raw_location(self):
482         """Returns the BodyLocation object associated with this member.
483
484         This is necessary so that EnumValue can override it report a non-empty
485         location: Doxygen doesn't provide any location for <enumvalue>.
486         """
487         return self._location
488
489     def get_reporter_location(self):
490         return self._get_raw_location().get_reporter_location()
491
492     def get_location(self):
493         """Return main location for the member.
494
495         This typically corresponds to the declaration.
496         """
497         return self._get_raw_location().get_location()
498
499     def get_body_location(self):
500         """Return location of the body for the member.
501
502         Some types of members do not have a body location, in which case this
503         returns None.
504         """
505         return self._get_raw_location().get_body_location()
506
507     def has_same_body_location(self):
508         """Check whether the main location is the same as body location."""
509         return self._get_raw_location().has_same_body_location()
510
511     def get_namespace(self):
512         return self._namespace
513
514     def get_parent_compounds(self):
515         return self._parents
516
517     def get_inherited_visibility(self):
518         return max([parent.get_visibility() for parent in self._parents])
519
520     def show(self):
521         self.show_base()
522         if self._alternates:
523             idlist = [x.get_id() for x in self._alternates]
524             print('Alt. IDs:   {0}'.format(', '.join(idlist)))
525         print('Parent vis: {0}'.format(self.get_inherited_visibility()))
526         print('Location:   {0}'.format(self.get_location().get_full_string()))
527         print('Body loc:   {0}'.format(self.get_body_location().get_full_string()))
528         _show_list('Parents', self._parents)
529
530 class Define(Member):
531     pass
532
533 class Variable(Member):
534     pass
535
536 class Typedef(Member):
537     pass
538
539 class Enum(Member):
540     def __init__(self, name, refid):
541         Member.__init__(self, name, refid)
542         self._values = set()
543
544     def _load_element(self, elem):
545         if elem.tag == 'enumvalue':
546             refid = elem.attrib['id']
547             # Doxygen seems to sometimes assign the same ID to a singleton enum
548             # value (this already triggers a warning in loading index.xml).
549             if refid == self.get_id():
550                 return True
551             member = self._docset.get_member(refid)
552             member.set_enum(self)
553             member.load_details_from_element(elem, self._xmlpath)
554             self._values.add(member)
555             return True
556         return False
557
558     def get_values(self):
559         return self._values
560
561 class EnumValue(Member):
562     def __init__(self, name, refid):
563         Member.__init__(self, name, refid)
564         self._enum = None
565
566     def set_enum(self, member):
567         assert self._enum is None
568         self._enum = member
569
570     def _get_raw_location(self):
571         return self._enum._get_raw_location()
572
573 class Function(Member):
574     pass
575
576 class FriendDeclaration(Member):
577     pass
578
579 # Compound entities
580
581 class Compound(Entity):
582
583     """Compound entity.
584
585     In Doxygen, a compound entity is an entity that has its own documentation
586     page, and can contain other documented entities (either members, or other
587     compounds).  Examples of compounds are files and classes.
588     A compound entity always appears in the documentation, even if it is
589     contained in another compound that is not documented.
590
591     The list of members for a compound is initialized when the XML index file
592     is read.  All other information is loaded from an XML file that is specific
593     to the compound.  In addition to describing the compound, this XML file
594     contains references to contained compounds, and details of all members
595     within the compound.
596     """
597
598     def __init__(self, name, refid):
599         Entity.__init__(self, name, refid)
600         self._members = dict()
601         self._children = set()
602         self._sections = []
603         self._groups = set()
604         self._loaded = False
605
606     def get_xml_path(self):
607         """Return path to the details XML file for this compound."""
608         return os.path.join(self._docset.get_xmlroot(), self.get_id() + '.xml')
609
610     def add_member(self, member):
611         """Add a contained member."""
612         self._members[member.get_id()] = member
613
614     def add_group(self, compound):
615         """Add a group (a compound entity) that contains this entity."""
616         self._groups.add(compound)
617
618     def replace_member(self, old, new):
619         if old.get_id() not in self._members:
620             raise ValueError("Trying to replace a non-existent member")
621         elif new.get_id() in self._members:
622             raise ValueError("Trying to replace with an existing member")
623         self._members[old.get_id()] = new
624         for section in self._sections:
625             section.replace_member(old, new)
626
627     def load_details(self):
628         """Load details for the compound from its details XML file.
629
630         This method processes common properties for a compound.
631         References to inner compounds are delegated to _load_inner_*() methods,
632         and all members encountered in the XML file are loaded with
633         Member.load_details_from_element().
634         Other elements are delegated to _load_element().
635         """
636         if self._loaded:
637             return
638         reporter = self._get_reporter()
639         xmlpath = self.get_xml_path()
640         compoundtree = ET.parse(xmlpath)
641         root = compoundtree.getroot()
642         if len(root) > 1:
643             reporter.xml_assert(xmlpath, "more than one compound in a file")
644         if root[0].tag != 'compounddef':
645             reporter.xml_assert(xmlpath, "expected <compounddef> as the first tag")
646             return
647         briefelem = None
648         detailselem = None
649         missing_members = set(self._members.values())
650         for elem in root[0]:
651             if elem.tag == 'compoundname':
652                 if elem.text != self.get_name():
653                     reporter.xml_assert(xmlpath,
654                             "compound name mismatch: '{0}' (in index.xml) vs. '{1}'"
655                             .format(self.get_name(), elem.text))
656             elif elem.tag == 'briefdescription':
657                 briefelem = elem
658             elif elem.tag == 'detaileddescription':
659                 detailselem = elem
660             elif elem.tag in ('includes', 'includedby', 'incdepgraph',
661                     'invincdepgraph', 'inheritancegraph', 'collaborationgraph',
662                     'programlisting', 'templateparamlist', 'listofallmembers'):
663                 pass
664             elif elem.tag.startswith('inner'):
665                 refid = elem.attrib['refid']
666                 reftype = elem.tag[5:]
667                 # TODO: Handle 'prot' attribute?
668                 refcompound = self._docset.get_compound(refid)
669                 self._children.add(refcompound)
670                 if reftype == 'file':
671                     self._load_inner_file(refcompound)
672                 elif reftype == 'dir':
673                     self._load_inner_dir(refcompound)
674                 elif reftype == 'group':
675                     self._load_inner_group(refcompound)
676                 elif reftype == 'namespace':
677                     self._load_inner_namespace(refcompound)
678                 elif reftype == 'class':
679                     self._load_inner_class(refcompound)
680                 else:
681                     reporter.xml_assert(xmlpath,
682                             "unknown inner compound type '{0}'".format(reftype))
683             elif elem.tag == 'sectiondef':
684                 # TODO: Handle header and description elements
685                 kind = elem.attrib['kind']
686                 section = MemberSection(kind)
687                 self._sections.append(section)
688                 for memberelem in elem.iter('memberdef'):
689                     refid = memberelem.attrib['id']
690                     member = self._members[refid]
691                     member.load_details_from_element(memberelem, xmlpath)
692                     section.add_member(member)
693                     if member in missing_members:
694                         missing_members.remove(member)
695                     # Enum values need special handling, but are not worth
696                     # extra generalization.
697                     if isinstance(member, Enum):
698                         missing_members.difference_update(member.get_values())
699             else:
700                 if not self._load_element(elem):
701                     reporter.xml_assert(xmlpath,
702                             "unknown compound child element '{0}'".format(elem.tag))
703         if missing_members:
704             reporter.xml_assert(xmlpath, 'members without section')
705         self._process_descriptions(briefelem, detailselem, None)
706         self._loaded = True
707
708     def _unexpected_inner_compound(self, typename, compound):
709         """Report a parsing error for an unexpected inner compound reference."""
710         reporter = self._get_reporter()
711         xmlpath = self.get_xml_path()
712         reporter.xml_assert(xmlpath,
713                 "unexpected inner {0}: {1}".format(typename, compound))
714
715     def _load_inner_file(self, compound):
716         """Process a reference to an inner file.
717
718         Derived classes should override the method if the compound type can
719         contain files as nested compounds.
720         """
721         self._unexpected_inner_compound("file", compound)
722
723     def _load_inner_dir(self, compound):
724         """Process a reference to an inner directory.
725
726         Derived classes should override the method if the compound type can
727         contain directories as nested compounds.
728         """
729         self._unexpected_inner_compound("dir", compound)
730
731     def _load_inner_group(self, compound):
732         """Process a reference to an inner group.
733
734         Derived classes should override the method if the compound type can
735         contain groups as nested compounds.
736         """
737         self._unexpected_inner_compound("group", compound)
738
739     def _load_inner_namespace(self, compound):
740         """Process a reference to an inner namespace.
741
742         Derived classes should override the method if the compound type can
743         contain namespaces as nested compounds.
744         """
745         self._unexpected_inner_compound("namespace", compound)
746
747     def _load_inner_class(self, compound):
748         """Process a reference to an inner class.
749
750         Derived classes should override the method if the compound type can
751         contain classes as nested compounds.
752         """
753         self._unexpected_inner_compound("class", compound)
754
755     def _load_element(self, element):
756         """Load data from a child XML element.
757
758         This method is called for all XML elements under the <compounddef>
759         element that are not handled directly by the Compound class.
760         Derived classes should return True if they process the element.
761         """
762         return False
763
764     def get_groups(self):
765         return self._groups
766
767     def show_base(self):
768         """Format information for common properties.
769
770         This extends Entity.show_base() by adding properties that are common to
771         all compounds.
772         """
773         Entity.show_base(self)
774         if self._groups:
775             print('Groups:   {0}'.format(', '.join(map(str, self._groups))))
776
777     def show_members(self):
778         """Show list of members.
779
780         This method is provided for use in show() methods of derived classes
781         to print the list of members.
782         """
783         for section in self._sections:
784             print('Member section: {0}'.format(section))
785             for member in section._members:
786                 print('  ', member)
787
788 class File(Compound):
789     def __init__(self, name, refid):
790         Compound.__init__(self, name, refid)
791         self._path = None
792         self._directory = None
793         self._classes = set()
794         self._namespaces = set()
795         self._is_source_file = None
796
797     def _load_inner_class(self, compound):
798         compound.add_file(self)
799         self._classes.add(compound)
800
801     def _load_inner_namespace(self, compound):
802         compound.add_file(self)
803         self._namespaces.add(compound)
804
805     def _load_element(self, elem):
806         if elem.tag == 'location':
807             self._path = elem.attrib['file']
808             extension = os.path.splitext(self._path)[1]
809             self._is_source_file = (extension in ('.c', '.cpp', '.cu'))
810             return True
811         return False
812
813     def set_directory(self, directory):
814         self._directory = directory
815
816     def get_reporter_location(self):
817         return reporter.Location(self._path, None)
818
819     def get_path(self):
820         return self._path
821
822     def get_directory(self):
823         return self._directory
824
825     def is_source_file(self):
826         return self._is_source_file
827
828     def show(self):
829         self.show_base()
830         print('Path:      {0}'.format(self._path))
831         print('Directory: {0}'.format(self._directory))
832         print('Source:    {0}'.format(self._is_source_file))
833         _show_list('Namespaces', self._namespaces)
834         _show_list('Classes', self._classes)
835         self.show_members()
836
837 class Directory(Compound):
838     def __init__(self, name, refid):
839         Compound.__init__(self, name, refid)
840         self._path = None
841         self._parent = None
842         self._subdirs = set()
843         self._files = set()
844
845     def _load_inner_file(self, compound):
846         compound.set_directory(self)
847         self._files.add(compound)
848
849     def _load_inner_dir(self, compound):
850         compound._parent = self
851         self._subdirs.add(compound)
852
853     def _load_element(self, elem):
854         if elem.tag == 'location':
855             self._path = elem.attrib['file']
856             return True
857         return False
858
859     def get_reporter_location(self):
860         return reporter.Location(self._path, None)
861
862     def get_path(self):
863         return self._path
864
865     def get_parent(self):
866         return self._parent
867
868     def get_subdirectories(self):
869         return self._subdirs
870
871     def show(self):
872         self.show_base()
873         print('Path:      {0}'.format(self._path))
874         if self._parent:
875             print('Parent:    {0}'.format(self._parent))
876         _show_list('Subdirectories', self._subdirs)
877         _show_list('Files', self._files)
878
879 class Group(Compound):
880     def __init__(self, name, refid):
881         Compound.__init__(self, name, refid)
882         self._title = None
883         self._files = set()
884         self._nestedgroups = set()
885         self._namespaces = set()
886         self._classes = set()
887
888     def _load_inner_file(self, compound):
889         compound.add_group(self)
890         self._files.add(compound)
891
892     # Doxygen 1.8.5 doesn't seem to put the directories into the XML output,
893     # even though they are in the HTML output as group members...
894
895     def _load_inner_group(self, compound):
896         compound.add_group(self)
897         self._nestedgroups.add(compound)
898
899     def _load_inner_namespace(self, compound):
900         compound.add_group(self)
901         self._namespaces.add(compound)
902
903     def _load_inner_class(self, compound):
904         compound.add_group(self)
905         self._classes.add(compound)
906
907     def _load_element(self, elem):
908         if elem.tag == 'title':
909             self._title = elem.text
910             return True
911         return False
912
913     def show(self):
914         self.show_base()
915         print('Title:     {0}'.format(self._title))
916         print('Inner compounds:')
917         for compound in self._children:
918             print('  ', compound)
919         self.show_members()
920
921 class Namespace(Compound):
922     def __init__(self, name, refid):
923         Compound.__init__(self, name, refid)
924         self._doclocation = None
925         self._files = set()
926         self._parent = None
927         self._innernamespaces = set()
928         self._classes = set()
929
930     def _load_inner_namespace(self, compound):
931         compound._parent = self
932         self._innernamespaces.add(compound)
933
934     def _load_inner_class(self, compound):
935         compound.set_namespace(self)
936         self._classes.add(compound)
937
938     def _load_element(self, elem):
939         if elem.tag == 'location':
940             self._doclocation = Location(elem)
941             return True
942         return False
943
944     def add_file(self, compound):
945         self._files.add(compound)
946
947     def get_reporter_location(self):
948         return self._doclocation.get_reporter_location()
949
950     def is_anonymous(self):
951         return 'anonymous_namespace{' in self.get_name()
952
953     def show(self):
954         self.show_base()
955         print('Doc. loc.: {0}'.format(self._doclocation.get_full_string()))
956         _show_list('Inner namespaces', self._innernamespaces)
957         _show_list('Classes', self._classes)
958         self.show_members()
959
960 class Class(Compound):
961     def __init__(self, name, refid):
962         Compound.__init__(self, name, refid)
963         self._location = None
964         self._namespace = None
965         self._files = set()
966         self._baseclasses = []
967         self._derivedclasses = set()
968         self._outerclass = None
969         self._innerclasses = set()
970
971     def _load_inner_class(self, compound):
972         compound.set_outer_class(self)
973         self._innerclasses.add(compound)
974
975     def _load_element(self, elem):
976         if elem.tag == 'basecompoundref':
977             # TODO: Handle unknown bases?
978             if 'refid' in elem.attrib:
979                 refid = elem.attrib['refid']
980                 # TODO: Handle prot and virt attributes, check name?
981                 base = self._docset.get_compound(refid)
982                 self._baseclasses.append(base)
983             return True
984         if elem.tag == 'derivedcompoundref':
985             refid = elem.attrib['refid']
986             # TODO: Handle prot and virt attributes, check name?
987             derived = self._docset.get_compound(refid)
988             self._derivedclasses.add(derived)
989             return True
990         elif elem.tag == 'location':
991             self._location = LocationWithBody(elem)
992             return True
993         return False
994
995     def add_file(self, compound):
996         self._files.add(compound)
997
998     def set_namespace(self, compound):
999         self._namespace = compound
1000
1001     def set_outer_class(self, compound):
1002         self._outerclass = compound
1003
1004     def get_reporter_location(self):
1005         return self._location.get_reporter_location()
1006
1007     def get_files(self):
1008         return self._files
1009
1010     def is_local(self):
1011         if len(self._files) > 1:
1012             return False
1013         for fileobj in self._files:
1014             if not fileobj.is_source_file():
1015                 return False
1016         return True
1017
1018     def show(self):
1019         self.show_base()
1020         print('Namespace:  {0}'.format(self._namespace))
1021         if self._outerclass:
1022             print('Outer cls:  {0}'.format(self._outerclass))
1023         location = self._location
1024         print('Location:   {0}'.format(location.get_location().get_full_string()))
1025         print('Body loc:   {0}'.format(location.get_body_location().get_full_string()))
1026         _show_list('Inner classes', self._innerclasses)
1027         self.show_members()
1028
1029 #####################################################################
1030 # Top-level container class
1031
1032 def _get_compound_type_from_kind(kind):
1033     """Map compound kinds from Doxygen XML to internal class types."""
1034     if kind == 'file':
1035         return File
1036     elif kind == 'dir':
1037         return Directory
1038     elif kind == 'group':
1039         return Group
1040     elif kind == 'namespace':
1041         return Namespace
1042     elif kind in ('class', 'struct', 'union'):
1043         return Class
1044     else:
1045         return None
1046
1047 def _get_member_type_from_kind(kind):
1048     """Map member kinds from Doxygen XML to internal class types."""
1049     if kind == 'define':
1050         return Define
1051     elif kind == 'variable':
1052         return Variable
1053     elif kind == 'typedef':
1054         return Typedef
1055     elif kind == 'enum':
1056         return Enum
1057     elif kind == 'enumvalue':
1058         return EnumValue
1059     elif kind == 'function':
1060         return Function
1061     elif kind == 'friend':
1062         return FriendDeclaration
1063     else:
1064         return None
1065
1066 class DocumentationSet(object):
1067
1068     """Root object for Doxygen XML documentation tree.
1069
1070     On initialization, it reads the index.xml file from the Doxygen XML output,
1071     which contains the list of entities.  Only the ID and name for the entities,
1072     and the parent compounds for members, are available from this file.
1073
1074     load_details() can be called to load the detailed compound XML files.
1075     This constructs relations between compound entities, and initializes other
1076     attributes for the entities.
1077
1078     load_file_details() does the same as load_details(), except that it leaves
1079     those compound XML files unloaded that do not affect file objects or their
1080     parent hierarchy.  This saves some time if details for actual code
1081     constructs like namespaces, classes or members are not necessary.
1082
1083     merge_duplicates() can then be called to remove members with different IDs,
1084     but that actually reference the same code entity.  For some reason, Doxygen
1085     seems to produce these in certain cases.
1086     """
1087
1088     def __init__(self, xmlroot, reporter):
1089         """Initialize the documentation set and read index data."""
1090         self._xmlroot = xmlroot
1091         self._reporter = reporter
1092         xmlpath = os.path.join(xmlroot, 'index.xml')
1093         indextree = ET.parse(xmlpath)
1094         self._compounds = dict()
1095         self._members = dict()
1096         self._files = dict()
1097         for compoundelem in indextree.getroot():
1098             name = compoundelem.find('name').text
1099             refid = compoundelem.attrib['refid']
1100             kind = compoundelem.attrib['kind']
1101             if kind in ('page', 'example'):
1102                 # TODO: Model these types as well
1103                 continue
1104             compoundtype = _get_compound_type_from_kind(kind)
1105             if compoundtype is None:
1106                 reporter.xml_assert(xmlpath,
1107                         "unknown compound kind '{0}'".format(kind))
1108                 continue
1109             compound = compoundtype(name, refid)
1110             compound.set_documentation_set(self)
1111             self._compounds[refid] = compound
1112             for memberelem in compoundelem.iter('member'):
1113                 name = memberelem.find('name').text
1114                 refid = memberelem.attrib['refid']
1115                 kind = memberelem.attrib['kind']
1116                 if refid in self._members:
1117                     member = self._members[refid]
1118                     membertype = _get_member_type_from_kind(kind)
1119                     if not isinstance(member, membertype):
1120                         reporter.xml_assert(xmlpath,
1121                                 "id '{0}' used for multiple kinds of members"
1122                                 .format(refid))
1123                         continue
1124                 else:
1125                     membertype = _get_member_type_from_kind(kind)
1126                     if membertype is None:
1127                         reporter.xml_assert(xmlpath,
1128                                 "unknown member kind '{0}'".format(kind))
1129                         continue
1130                     member = membertype(name, refid)
1131                     member.set_documentation_set(self)
1132                     self._members[refid] = member
1133                 member.add_parent_compound(compound)
1134                 compound.add_member(member)
1135
1136     def load_file_details(self, filelist=None):
1137         """Load detailed XML files for all files and possible parents of files.
1138
1139         If filelist is set, it should be a list of file paths, and details will
1140         be loaded only for files in those paths.  The paths should be relative
1141         to the root of the Gromacs source tree."""
1142         for compound in self._compounds.values():
1143             if isinstance(compound, (Directory, Group)):
1144                 compound.load_details()
1145             elif not filelist and isinstance(compound, File):
1146                 compound.load_details()
1147                 self._files[compound.get_path()] = compound
1148         if filelist:
1149             # We can't access the full path from the File object before the
1150             # details are loaded, because Doxygen does not write that into
1151             # index.xml.  But we can use the Directory objects (where the name
1152             # is the relative path) to get the path.
1153             for compound in self._compounds.values():
1154                 if isinstance(compound, File):
1155                     dirobj = compound.get_directory()
1156                     if not dirobj:
1157                         continue
1158                     relpath = compound.get_directory().get_name()
1159                     relpath = os.path.join(relpath, compound.get_name())
1160                     if relpath in filelist:
1161                         compound.load_details()
1162                         self._files[compound.get_path()] = compound
1163
1164     def load_details(self):
1165         """Load detailed XML files for each compound."""
1166         for compound in self._compounds.values():
1167             compound.load_details()
1168             if isinstance(compound, File):
1169                 self._files[compound.get_path()] = compound
1170         # TODO: Add links to files using location
1171
1172     def merge_duplicates(self):
1173         """Merge duplicate member definitions based on body location.
1174
1175         At least for some functions that are declared in a header, but have
1176         their body in a source file, Doxygen seems to create two different IDs,
1177         but the contents of the members are the same, except for the location
1178         attribute.  This method merges members that have identical name and
1179         body location into a single member that keeps the information from both
1180         instances (they should only differ in the location attribute and in
1181         parent compounds).  Both IDs point to the merged member after this
1182         method.
1183         """
1184         members_by_body = dict()
1185         for member in self._members.values():
1186             bodyloc = member.get_body_location()
1187             if bodyloc:
1188                 index = (bodyloc, type(member), member.get_name())
1189                 if index not in members_by_body:
1190                     members_by_body[index] = []
1191                 members_by_body[index].append(member)
1192         for memberlist in members_by_body.values():
1193             if len(memberlist) > 1:
1194                 declaration = None
1195                 otherdeclarations = []
1196                 definition = None
1197                 for member in memberlist:
1198                     if member.has_same_body_location():
1199                         if definition is not None:
1200                             self._reporter.xml_assert(None,
1201                                     "duplicate definition for a member '{0}'"
1202                                     .format(definition))
1203                             continue
1204                         definition = member
1205                     elif declaration is None:
1206                         declaration = member
1207                     else:
1208                         otherdeclarations.append(member)
1209                 if otherdeclarations:
1210                     # TODO: gmx_cpuid.c produces some false positives
1211                     details = []
1212                     for otherdeclaration in otherdeclarations:
1213                         details.append('{0}: another declaration is here'
1214                                 .format(otherdeclaration.get_reporter_location()))
1215                     details.append('{0}: definition is here'
1216                             .format(declaration.get_body_location()))
1217                     text = "duplicate declarations for a member '{0}'".format(declaration)
1218                     self._reporter.code_issue(declaration, text, details)
1219                     continue
1220                 self._members[definition.get_id()] = declaration
1221                 declaration.merge_definition(definition)
1222                 for compound in definition.get_parent_compounds():
1223                     compound.replace_member(definition, declaration)
1224
1225     def get_reporter(self):
1226         """Return reporter object to use for reporting issues.
1227
1228         This method is used in the entity classes to access the reporter when
1229         they are parsing the XML files.
1230         """
1231         return self._reporter
1232
1233     def get_xmlroot(self):
1234         """Return root of the Doxygen XML directory."""
1235         return self._xmlroot
1236
1237     def get_compound(self, refid):
1238         return self._compounds[refid]
1239
1240     def get_member(self, refid):
1241         return self._members[refid]
1242
1243     def get_compounds(self, types, predicate=None):
1244         result = []
1245         for compound in self._compounds.values():
1246             if isinstance(compound, types) and \
1247                     (predicate is None or predicate(compound)):
1248                 result.append(compound)
1249         return result
1250
1251     def get_members(self, types=None, predicate=None):
1252         # self._members can contain duplicates because of merge_duplicates()
1253         result = set()
1254         for member in self._members.values():
1255             if (types is None or isinstance(member, types)) and \
1256                     (predicate is None or predicate(member)):
1257                 result.add(member)
1258         return list(result)
1259
1260     def get_files(self, paths=None):
1261         if paths:
1262             return self.get_compounds(File, lambda x: x.get_name().endswith(paths))
1263         else:
1264             return self.get_compounds(File)
1265
1266     def get_directories(self, paths):
1267         return self.get_compounds(Directory, lambda x: x.get_name().endswith(paths))
1268
1269     def get_groups(self, name):
1270         return self.get_compounds(Group, lambda x: x.get_name() in name)
1271
1272     def get_namespaces(self, name=None):
1273         if name:
1274             return self.get_compounds(Namespace, lambda x: x.get_name() in name)
1275         else:
1276             return self.get_compounds(Namespace)
1277
1278     def get_classes(self, name=None):
1279         if name:
1280             return self.get_compounds(Class, lambda x: x.get_name() in name)
1281         else:
1282             return self.get_compounds(Class)
1283
1284     def get_functions(self, name):
1285         return self.get_members(Member, lambda x: x.get_name() in name)
1286
1287 #####################################################################
1288 # Code for running in script mode
1289
1290 def main():
1291     """Run the script in for debugging/Doxygen XML output inspection."""
1292     import sys
1293
1294     from optparse import OptionParser
1295
1296     from reporter import Reporter
1297
1298     parser = OptionParser()
1299     parser.add_option('-R', '--root-dir',
1300                       help='Doxygen XML root directory')
1301     parser.add_option('-F', '--show-file', action='append',
1302                       help='Show contents of given file')
1303     parser.add_option('-d', '--show-dir', action='append',
1304                       help='Show contents of given directory')
1305     parser.add_option('-g', '--show-group', action='append',
1306                       help='Show contents of given group')
1307     parser.add_option('-n', '--show-namespace', action='append',
1308                       help='Show contents of given namespace')
1309     parser.add_option('-c', '--show-class', action='append',
1310                       help='Show contents of given class')
1311     # TODO: Add option for other types, and make them work
1312     parser.add_option('-f', '--show-function', action='append',
1313                       help='Show details of given function')
1314     options, args = parser.parse_args()
1315
1316     reporter = Reporter()
1317
1318     sys.stderr.write('Loading index.xml...\n')
1319     docset = DocumentationSet(options.root_dir, reporter)
1320     reporter.write_pending()
1321     sys.stderr.write('Loading details...\n')
1322     docset.load_details()
1323     reporter.write_pending()
1324     sys.stderr.write('Processing...\n')
1325     docset.merge_duplicates()
1326     reporter.write_pending()
1327
1328     objlist = []
1329     if options.show_file:
1330         objlist.extend(docset.get_files(tuple(options.show_file)))
1331     if options.show_dir:
1332         objlist.extend(docset.get_directories(tuple(options.show_dir)))
1333     if options.show_group:
1334         objlist.extend(docset.get_groups(tuple(options.show_group)))
1335     if options.show_namespace:
1336         # TODO: Replace file names with anonymous_namespace{filename}
1337         objlist.extend(docset.get_namespaces(tuple(options.show_namespace)))
1338     if options.show_class:
1339         objlist.extend(docset.get_classes(tuple(options.show_class)))
1340     if options.show_function:
1341         objlist.extend(docset.get_functions(tuple(options.show_function)))
1342     for obj in objlist:
1343         obj.show()
1344
1345 if __name__ == '__main__':
1346     main()