f26ad9cdbdf6102913fd8bed6760aefc60a9f120
[alexxy/gromacs.git] / doxygen / doxygenxml.py
1 #!/usr/bin/python
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2014, by the GROMACS development team, led by
6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 # and including many others, as listed in the AUTHORS file in the
8 # top-level source directory and at http://www.gromacs.org.
9 #
10 # GROMACS is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public License
12 # as published by the Free Software Foundation; either version 2.1
13 # of the License, or (at your option) any later version.
14 #
15 # GROMACS is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 # Lesser General Public License for more details.
19 #
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with GROMACS; if not, see
22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24 #
25 # If you want to redistribute modifications to GROMACS, please
26 # consider that scientific software is very special. Version
27 # control is crucial - bugs must be traceable. We will be happy to
28 # consider code for inclusion in the official distribution, but
29 # derived work must not be called official GROMACS. Details are found
30 # in the README & COPYING files - if they are missing, get the
31 # official version at http://www.gromacs.org.
32 #
33 # To help us fund GROMACS development, we humbly ask that you cite
34 # the research papers on the package. Check out http://www.gromacs.org.
35
36 """Doxygen XML output parser.
37
38 This module implements a parser for the Doxygen XML output, converting it into
39 an object model that can be used to navigate the documentation.  It also uses
40 knowledge from how Doxygen works to provide access to things like visibility of
41 individual member documentation (e.g., based on what is the visibility of its
42 parent compound objects).
43
44 The object model is rooted at a DocumentationSet object.  Each documented
45 entity is modeled as an Entity, and this has subclasses Member and Compound to
46 correspond to the two categories of items that Doxygen handles.  These classes
47 are further subclassed to match each kind of entity that Doxygen produces.
48 Only kinds produced by Doxygen from C/C++ code are modeled.  Everything else
49 is ignored after a warning.
50
51 Currently the member entities are not completely parsed from the XML files, and
52 the interface may need additional work to provide convenient access to all
53 member types and their common properties.  For now, focus is in modeling the
54 compound entities.
55
56 The implementation is mostly independent of any GROMACS-specific rules, except
57 for the following:
58  - DocType.library is a GROMACS-specific construct that is deduced from the
59    contents of the detailed description (presence of a \libinternal command in
60    the Doxygen comment triggers it).
61  - DocType.internal is deduced from the presence of a \internal command that
62    covers the whole detailed description.
63  - List of extensions for determining whether a file is a source file only
64    contains extensions actually used by GROMACS.
65 It would be possible to move these out from this file, but that would require
66 exposing the XML representation for the descriptions, which is not nice either.
67
68 The module can also be run as a script that can dump out different parts of the
69 object model.  This can be used to debug the parser, as well as check what is
70 actually in the XML documentation.
71 """
72
73 import os.path
74 import xml.etree.ElementTree as ET
75
76 import reporter
77
78 #####################################################################
79 # Helper functions and classes
80
81 def _show_list(title, objlist):
82     """Helper function for formatting a list of objects for debug output."""
83     if objlist:
84         print '{0}:'.format(title)
85         for obj in objlist:
86             print '  ', obj
87
88 class DocType(object):
89
90     """Documentation visibility in the generated documentation."""
91
92     # Mapping to string representations for the internal integer values
93     _names = ['undocumented', 'internal', 'library', 'public']
94
95     def __init__(self, value):
96         """Initialize a DocType instance.
97
98         DocType.{none,internal,library,public} should be used outside the class
99         instead of calling the constructor.
100         """
101         self._value = value
102
103     def __str__(self):
104         """Return string representation for the documentation type."""
105         return self._names[self._value]
106
107     def __cmp__(self, other):
108         """Order documentation types in the order of visibility."""
109         return cmp(self._value, other._value)
110
111 # Static values for documentation types.
112 DocType.none = DocType(0)
113 DocType.internal = DocType(1)
114 DocType.library = DocType(2)
115 DocType.public = DocType(3)
116
117 class Location(object):
118
119     """Location of a Doxygen entity.
120
121     This class contains the logic to parse a <location> tag in Doxygen XML.
122     It is used as the entity location in cases where body location is not
123     expected, or as part of a LocationWithBody.
124     """
125
126     def __init__(self, elem):
127         """Initialize location from a <location> element."""
128         self.filepath = elem.attrib['file']
129         self.line = int(elem.attrib['line'])
130         self.column = elem.attrib['column']
131
132     def __str__(self):
133         return '{0}:{1}'.format(self.filepath, self.line)
134
135     def get_reporter_location(self):
136         return reporter.Location(self.filepath, self.line)
137
138     def get_full_string(self):
139         return '{0}:{1}:{2}'.format(self.filepath, self.line, self.column)
140
141 class BodyLocation(object):
142
143     """Body location of a Doxygen entity.
144
145     This class contains the logic to parse a body location from a <location>
146     tag in Doxygen XML.  Not all entities have these attributes.
147     This is only used as part of a LocationWithBody, which handles cases where
148     the body location is optional.
149
150     The body location can be compared and hashed so that it can be used in
151     a dictionary for DocumentationSet.merge_duplicates().
152     """
153
154     def __init__(self, elem):
155         """Initialize body location from a <location> element."""
156         self.filepath = elem.attrib['bodyfile']
157         self.startline = int(elem.attrib['bodystart'])
158         self.endline = int(elem.attrib['bodyend'])
159
160     def __cmp__(self, other):
161         result = cmp(self.filepath, other.filepath)
162         if result == 0:
163             result = cmp(self.startline, other.startline)
164         if result == 0:
165             result = cmp(self.endline, other.endline)
166         return result
167
168     def __hash__(self):
169         return hash(self.filepath) ^ hash(self.startline) ^ hash(self.endline)
170
171     def __str__(self):
172         return '{0}:{1}'.format(self.filepath, self.startline)
173
174     def get_full_string(self):
175         if self.endline < 0:
176             return self.__str__()
177         return '{0}:{1}-{2}'.format(self.filepath, self.startline, self.endline)
178
179 class LocationWithBody(object):
180
181     """Location for a Doxygen entity that can have a body location.
182
183     This class is used to represent the location of a Doxygen entity that can
184     have a body location.
185     """
186
187     def __init__(self, elem):
188         """Initialize location from a <location> element."""
189         self._location = Location(elem)
190         if 'bodyfile' in elem.attrib:
191             self._bodylocation = BodyLocation(elem)
192         else:
193             self._bodylocation = None
194
195     def __str__(self):
196         if not self._bodylocation:
197             return '{0} (no body)'.format(self._location)
198         else:
199             return '{0} / {1}'.format(self._location, self._bodylocation)
200
201     def get_reporter_location(self):
202         """Return reporter location for this location.
203
204         All issues are reported at the main location, which should match with
205         the declaration, where most of the documentation typically is.
206         """
207         return self._location.get_reporter_location()
208
209     def get_location(self):
210         return self._location
211
212     def get_body_location(self):
213         return self._bodylocation
214
215     def has_same_body_location(self):
216         """Check whether main location matches body location.
217
218         If the main location is different, then it likely points to the
219         declaration of the function.
220         """
221         return self._location.filepath == self._bodylocation.filepath and \
222                 self._location.line == self._bodylocation.startline
223
224 class MemberSection(object):
225
226     """Section of members within a compound entity."""
227
228     def __init__(self, kind):
229         self._kind = kind
230         self._members = []
231
232     def __str__(self):
233         return self._kind
234
235     def add_member(self, member):
236         self._members.append(member)
237
238     def replace_member(self, old, new):
239         try:
240             pos = self._members.index(old)
241         except ValueError:
242             return
243         self._members[pos] = new
244
245 #####################################################################
246 # Documentation entities
247
248 class Entity(object):
249
250     """Doxygen documentation entity.
251
252     This class represents common properties of an entity that can contain
253     Doxygen documentation.
254     """
255
256     def __init__(self, name, refid):
257         self._docset = None
258         self._name = name
259         self._id = refid
260         self._has_brief_description = False
261         self._has_detailed_description = False
262         self._has_inbody_description = False
263         self._visibility = DocType.none
264
265     def __str__(self):
266         return self._name
267
268     def _get_reporter(self):
269         """Return reporter to use for parsing issues."""
270         return self._docset.get_reporter()
271
272     def set_documentation_set(self, docset):
273         """Set the documentation set this entity belongs to.
274
275         The documentation set parent provides access to a common reporter
276         object, and also allows the entity to resolve references to other
277         entities while loading XML information.
278         """
279         assert self._docset is None
280         self._docset = docset
281
282     def get_id(self):
283         return self._id
284
285     def get_name(self):
286         return self._name
287
288     def get_reporter_location(self):
289         return reporter.Location('<{0}>'.format(self._name), None)
290
291     def get_visibility(self):
292         return self._visibility
293
294     def is_documented(self):
295         return self._visibility != DocType.none
296
297     def has_brief_description(self):
298         return self._has_brief_description
299
300     def has_inbody_description(self):
301         return self._has_inbody_description
302
303     def _process_descriptions(self, briefelem, detailselem, inbodyelem):
304         reporter = self._get_reporter()
305         if briefelem is not None and len(briefelem) > 0:
306             self._has_brief_description = True
307             self._visibility = DocType.public
308         if detailselem is not None and len(detailselem) > 0:
309             self._visibility = DocType.public
310             # Gromacs-specific:
311             # \internal is used at the beginning of a comment block to
312             # mark the block internal to the module.
313             # \libinternal is used similarly, and inserts custom XML
314             # elements.
315             if detailselem[0].tag == 'internal':
316                 if len(detailselem) == 1:
317                     self._visibility = DocType.internal
318                 else:
319                     # TODO: Should we also check if internal appears elsewhere?
320                     reporter.doc_note(self, '\internal does not cover whole documentation')
321             if detailselem[0].find('libinternal') is not None:
322                 if self._visibility == DocType.public:
323                     self._visibility = DocType.library
324                 else:
325                     reporter.doc_error(self, '\libinternal should not be used inside \internal')
326             self._has_detailed_description = True
327         if inbodyelem is not None:
328             self._has_inbody_description = (len(inbodyelem) > 0)
329
330     def show_base(self):
331         """Format information for common properties.
332
333         This is called from subclass show() methods to show base information
334         about the entity.
335         """
336         print 'ID:         {0}'.format(self._id)
337         print 'Name:       {0}'.format(self._name)
338         print 'Location:   {0}'.format(self.get_reporter_location())
339         doctype = []
340         if self._has_brief_description:
341             doctype.append('brief')
342         if self._has_detailed_description:
343             doctype.append('details')
344         if self._has_inbody_description:
345             doctype.append('in-body')
346         if not doctype:
347             doctype.append('none')
348         print 'Doc:        {0}'.format(', '.join(doctype))
349         print 'Visibility: {0}'.format(self._visibility)
350
351 # Member entities
352
353 class Member(Entity):
354
355     """Member entity.
356
357     In Doxygen, a member entity is an entity such as a function or an enum that
358     cannot contain other documented entities (an enum is a slight exception, as
359     enum values are still nested within the enum member).  A member always
360     belongs to one (or more) compounds, which means that the detailed
361     documentation for the member appears on the documentation page for that
362     compound.  If none of the parent compounds are documented, the member
363     doesn't appear anywhere, even if it is documented.
364
365     Member information is loaded from a parent compound's XML file.  If there
366     is more than one parent, the first one encountered will be used
367     (presumably, Doxygen duplicates the information into each XML file).
368     """
369
370     def __init__(self, name, refid):
371         Entity.__init__(self, name, refid)
372         self._parents = set()
373         self._location = None
374         self._alternates = set()
375         self._loaded = False
376         # TODO: Move to Entity?
377         self._xmlpath = None
378
379     def add_parent_compound(self, compound):
380         """Add a compound that contains this member."""
381         self._parents.add(compound)
382
383     def _get_raw_location(self):
384         """Returns the BodyLocation object associated with this member.
385
386         This is necessary so that EnumValue can override it report a non-empty
387         location: Doxygen doesn't provide any location for <enumvalue>.
388         """
389         return self._location
390
391     def get_parent_compounds(self):
392         return self._parents
393
394     def get_inherited_visibility(self):
395         return max([parent.get_visibility() for parent in self._parents])
396
397     def is_visible(self):
398         return self.get_inherited_visibility() != DocType.none
399
400     def has_same_body_location(self):
401         return self._get_raw_location().has_same_body_location()
402
403     def get_reporter_location(self):
404         return self._get_raw_location().get_reporter_location()
405
406     def get_location(self):
407         return self._get_raw_location().get_location()
408
409     def get_body_location(self):
410         return self._get_raw_location().get_body_location()
411
412     def merge_definition(self, definition):
413         self._parents.update(definition._parents)
414         self._alternates.add(definition)
415
416     def load_details_from_element(self, rootelem, xmlpath):
417         """Load details for the member from a given XML element.
418
419         This method is called when encountering member definitions while
420         processing a compound XML file to load the information for that member.
421         It processes common properties for a member, and delegates other
422         elements to _load_element().
423         """
424         if self._loaded:
425             # TODO: It would be nice to verify that the same information
426             # is present in all instances
427             return
428         self._xmlpath = xmlpath
429         # TODO: Process the attributes
430         reporter = self._get_reporter()
431         briefelem = None
432         detailselem = None
433         inbodyelem = None
434         for elem in rootelem:
435             if elem.tag == 'name':
436                 if elem.text != self.get_name():
437                     reporter.xml_assert(xmlpath,
438                             "member name mismatch: '{0}' (in index.xml) vs. '{1}'".format(
439                                 self.get_name(), elem.text))
440             elif elem.tag == 'briefdescription':
441                 briefelem = elem
442             elif elem.tag == 'detaileddescription':
443                 detailselem = elem
444             elif elem.tag == 'inbodydescription':
445                 # TODO: in-body description is probably only possible for
446                 # functions; move it there.
447                 inbodyelem = elem
448             elif elem.tag == 'location':
449                 self._location = LocationWithBody(elem)
450             else:
451                 if not self._load_element(elem):
452                     # TODO Process the rest of the elements so that we can check this
453                     #reporter.xml_assert(xmlpath,
454                     #        "unknown member child element '{0}'".format(elem.tag))
455                     pass
456         self._process_descriptions(briefelem, detailselem, inbodyelem)
457         self._loaded = True
458
459     def _load_element(self, element):
460         """Load data from a child XML element.
461
462         This method is called for all XML elements under the <memberdef>
463         element that are not handled directly by the Member class.
464         Derived classes should return True if they process the element.
465         """
466         return False
467
468     def show(self):
469         self.show_base()
470         print 'Parent vis: {0}'.format(self.get_inherited_visibility())
471         print 'Location:   {0}'.format(self.get_location().get_full_string())
472         print 'Body loc:   {0}'.format(self.get_body_location().get_full_string())
473         _show_list('Parents', self._parents)
474
475 class Define(Member):
476     pass
477
478 class Variable(Member):
479     pass
480
481 class Typedef(Member):
482     pass
483
484 class Enum(Member):
485     def __init__(self, name, refid):
486         Member.__init__(self, name, refid)
487         self._values = set()
488
489     def _load_element(self, elem):
490         if elem.tag == 'enumvalue':
491             refid = elem.attrib['id']
492             # Doxygen seems to sometimes assign the same ID to a singleton enum
493             # value (this already triggers a warning in loading index.xml).
494             if refid == self.get_id():
495                 return True
496             member = self._docset.get_member(refid)
497             member.set_enum(self)
498             member.load_details_from_element(elem, self._xmlpath)
499             self._values.add(member)
500             return True
501         return False
502
503     def get_values(self):
504         return self._values
505
506 class EnumValue(Member):
507     def __init__(self, name, refid):
508         Member.__init__(self, name, refid)
509         self._enum = None
510
511     def set_enum(self, member):
512         assert self._enum is None
513         self._enum = member
514
515     def _get_raw_location(self):
516         return self._enum._get_raw_location()
517
518 class Function(Member):
519     pass
520
521 class FriendDeclaration(Member):
522     pass
523
524 # Compound entities
525
526 class Compound(Entity):
527
528     """Compound entity.
529
530     In Doxygen, a compound entity is an entity that has its own documentation
531     page, and can contain other documented entities (either members, or other
532     compounds).  Examples of compounds are files and classes.
533     A compound entity always appears in the documentation, even if it is
534     contained in another compound that is not documented.
535
536     The list of members for a compound is initialized when the XML index file
537     is read.  All other information is loaded from an XML file that is specific
538     to the compound.  In addition to describing the compound, this XML file
539     contains references to contained compounds, and details of all members
540     within the compound.
541     """
542     def __init__(self, name, refid):
543         Entity.__init__(self, name, refid)
544         self._members = dict()
545         self._children = set()
546         self._sections = []
547         self._groups = set()
548         self._loaded = False
549
550     def _get_xml_path(self):
551         """Return path to the details XML file for this compound."""
552         return os.path.join(self._docset.get_xmlroot(), self.get_id() + '.xml')
553
554     def add_member(self, member):
555         """Add a contained member."""
556         self._members[member.get_id()] = member
557
558     def add_group(self, compound):
559         """Add a group (a compound entity) that contains this entity."""
560         self._groups.add(compound)
561
562     def replace_member(self, old, new):
563         if old.get_id() not in self._members:
564             raise ValueError("Trying to replace a non-existent member")
565         elif new.get_id() in self._members:
566             raise ValueError("Trying to replace with an existing member")
567         self._members[old.get_id()] = new
568         for section in self._sections:
569             section.replace_member(old, new)
570
571     def load_details(self):
572         """Load details for the compound from its details XML file.
573
574         This method processes common properties for a compound.
575         References to inner compounds are delegated to _load_inner_*() methods,
576         and all members encountered in the XML file are loaded with
577         Member.load_details_from_element().
578         Other elements are delegated to _load_element().
579         """
580         if self._loaded:
581             return
582         reporter = self._get_reporter()
583         xmlpath = self._get_xml_path()
584         compoundtree = ET.parse(xmlpath)
585         root = compoundtree.getroot()
586         if len(root) > 1:
587             reporter.xml_assert(xmlpath, "more than one compound in a file")
588         if root[0].tag != 'compounddef':
589             reporter.xml_assert(xmlpath, "expected <compounddef> as the first tag")
590             return
591         briefelem = None
592         detailselem = None
593         missing_members = set(self._members.values())
594         for elem in root[0]:
595             if elem.tag == 'compoundname':
596                 if elem.text != self.get_name():
597                     reporter.xml_assert(xmlpath,
598                             "compound name mismatch: '{0}' (in index.xml) vs. '{1}'"
599                             .format(self.get_name(), elem.text))
600             elif elem.tag == 'briefdescription':
601                 briefelem = elem
602             elif elem.tag == 'detaileddescription':
603                 detailselem = elem
604             elif elem.tag in ('includes', 'includedby', 'incdepgraph',
605                     'invincdepgraph', 'inheritancegraph', 'collaborationgraph',
606                     'programlisting', 'templateparamlist', 'listofallmembers'):
607                 pass
608             elif elem.tag.startswith('inner'):
609                 refid = elem.attrib['refid']
610                 reftype = elem.tag[5:]
611                 # TODO: Handle 'prot' attribute?
612                 refcompound = self._docset.get_compound(refid)
613                 self._children.add(refcompound)
614                 if reftype == 'file':
615                     self._load_inner_file(refcompound)
616                 elif reftype == 'dir':
617                     self._load_inner_dir(refcompound)
618                 elif reftype == 'group':
619                     self._load_inner_group(refcompound)
620                 elif reftype == 'namespace':
621                     self._load_inner_namespace(refcompound)
622                 elif reftype == 'class':
623                     self._load_inner_class(refcompound)
624                 else:
625                     reporter.xml_assert(xmlpath,
626                             "unknown inner compound type '{0}'".format(reftype))
627             elif elem.tag == 'sectiondef':
628                 # TODO: Handle header and description elements
629                 kind = elem.attrib['kind']
630                 section = MemberSection(kind)
631                 self._sections.append(section)
632                 for memberelem in elem.iter('memberdef'):
633                     refid = memberelem.attrib['id']
634                     member = self._members[refid]
635                     member.load_details_from_element(memberelem, xmlpath)
636                     section.add_member(member)
637                     if member in missing_members:
638                         missing_members.remove(member)
639                     # Enum values need special handling, but are not worth
640                     # extra generalization.
641                     if isinstance(member, Enum):
642                         missing_members.difference_update(member.get_values())
643             else:
644                 if not self._load_element(elem):
645                     reporter.xml_assert(xmlpath,
646                             "unknown compound child element '{0}'".format(elem.tag))
647         if missing_members:
648             reporter.xml_assert(xmlpath, 'members without section')
649         self._process_descriptions(briefelem, detailselem, None)
650         self._loaded = True
651
652     def _unexpected_inner_compound(self, typename, compound):
653         """Report a parsing error for an unexpected inner compound reference."""
654         reporter = self._get_reporter()
655         xmlpath = self._get_xml_path()
656         reporter.xml_assert(xmlpath,
657                 "unexpected inner {0}: {1}".format(typename, compound))
658
659     def _load_inner_file(self, compound):
660         """Process a reference to an inner file.
661
662         Derived classes should override the method if the compound type can
663         contain files as nested compounds.
664         """
665         self._unexpected_inner_compound("file", compound)
666
667     def _load_inner_dir(self, compound):
668         """Process a reference to an inner directory.
669
670         Derived classes should override the method if the compound type can
671         contain directories as nested compounds.
672         """
673         self._unexpected_inner_compound("dir", compound)
674
675     def _load_inner_group(self, compound):
676         """Process a reference to an inner group.
677
678         Derived classes should override the method if the compound type can
679         contain groups as nested compounds.
680         """
681         self._unexpected_inner_compound("group", compound)
682
683     def _load_inner_namespace(self, compound):
684         """Process a reference to an inner namespace.
685
686         Derived classes should override the method if the compound type can
687         contain namespaces as nested compounds.
688         """
689         self._unexpected_inner_compound("namespace", compound)
690
691     def _load_inner_class(self, compound):
692         """Process a reference to an inner class.
693
694         Derived classes should override the method if the compound type can
695         contain classes as nested compounds.
696         """
697         self._unexpected_inner_compound("class", compound)
698
699     def _load_element(self, element):
700         """Load data from a child XML element.
701
702         This method is called for all XML elements under the <compounddef>
703         element that are not handled directly by the Compound class.
704         Derived classes should return True if they process the element.
705         """
706         return False
707
708     def get_groups(self):
709         return self._groups
710
711     def show_base(self):
712         """Format information for common properties.
713
714         This extends Entity.show_base() by adding properties that are common to
715         all compounds.
716         """
717         Entity.show_base(self)
718         if self._groups:
719             print 'Groups:   {0}'.format(', '.join(map(str, self._groups)))
720
721     def show_members(self):
722         """Show list of members.
723
724         This method is provided for use in show() methods of derived classes
725         to print the list of members.
726         """
727         for section in self._sections:
728             print 'Member section: {0}'.format(section)
729             for member in section._members:
730                 print '  ', member
731
732 class File(Compound):
733     def __init__(self, name, refid):
734         Compound.__init__(self, name, refid)
735         self._path = None
736         self._directory = None
737         self._classes = set()
738         self._namespaces = set()
739         self._is_source_file = None
740
741     def _load_inner_class(self, compound):
742         compound.add_file(self)
743         self._classes.add(compound)
744
745     def _load_inner_namespace(self, compound):
746         compound.add_file(self)
747         self._namespaces.add(compound)
748
749     def _load_element(self, elem):
750         if elem.tag == 'location':
751             self._path = elem.attrib['file']
752             extension = os.path.splitext(self._path)[1]
753             self._is_source_file = (extension in ('.c', '.cpp', '.cu'))
754             return True
755         return False
756
757     def set_directory(self, directory):
758         self._directory = directory
759
760     def get_reporter_location(self):
761         return reporter.Location(self._path, None)
762
763     def get_path(self):
764         return self._path
765
766     def get_directory(self):
767         return self._directory
768
769     def is_source_file(self):
770         return self._is_source_file
771
772     def show(self):
773         self.show_base()
774         print 'Path:      {0}'.format(self._path)
775         print 'Directory: {0}'.format(self._directory)
776         print 'Source:    {0}'.format(self._is_source_file)
777         _show_list('Namespaces', self._namespaces)
778         _show_list('Classes', self._classes)
779         self.show_members()
780
781 class Directory(Compound):
782     def __init__(self, name, refid):
783         Compound.__init__(self, name, refid)
784         self._path = None
785         self._parent = None
786         self._subdirs = set()
787         self._files = set()
788
789     def _load_inner_file(self, compound):
790         compound.set_directory(self)
791         self._files.add(compound)
792
793     def _load_inner_dir(self, compound):
794         compound._parent = self
795         self._subdirs.add(compound)
796
797     def _load_element(self, elem):
798         if elem.tag == 'location':
799             self._path = elem.attrib['file']
800             return True
801         return False
802
803     def get_reporter_location(self):
804         return reporter.Location(self._path, None)
805
806     def get_path(self):
807         return self._path
808
809     def get_parent(self):
810         return self._parent
811
812     def get_subdirectories(self):
813         return self._subdirs
814
815     def show(self):
816         self.show_base()
817         print 'Path:      {0}'.format(self._path)
818         if self._parent:
819             print 'Parent:    {0}'.format(self._parent)
820         _show_list('Subdirectories', self._subdirs)
821         _show_list('Files', self._files)
822
823 class Group(Compound):
824     def __init__(self, name, refid):
825         Compound.__init__(self, name, refid)
826         self._title = None
827         self._files = set()
828         self._nestedgroups = set()
829         self._namespaces = set()
830         self._classes = set()
831
832     def _load_inner_file(self, compound):
833         compound.add_group(self)
834         self._files.add(compound)
835
836     # Doxygen 1.8.5 doesn't seem to put the directories into the XML output,
837     # even though they are in the HTML output as group members...
838
839     def _load_inner_group(self, compound):
840         compound.add_group(self)
841         self._nestedgroups.add(compound)
842
843     def _load_inner_namespace(self, compound):
844         compound.add_group(self)
845         self._namespaces.add(compound)
846
847     def _load_inner_class(self, compound):
848         compound.add_group(self)
849         self._classes.add(compound)
850
851     def _load_element(self, elem):
852         if elem.tag == 'title':
853             self._title = elem.text
854             return True
855         return False
856
857     def show(self):
858         self.show_base()
859         print 'Title:     {0}'.format(self._title)
860         print 'Inner compounds:'
861         for compound in self._children:
862             print '  ', compound
863         self.show_members()
864
865 class Namespace(Compound):
866     def __init__(self, name, refid):
867         Compound.__init__(self, name, refid)
868         self._doclocation = None
869         self._files = set()
870         self._parent = None
871         self._innernamespaces = set()
872         self._classes = set()
873
874     def _load_inner_namespace(self, compound):
875         compound._parent = self
876         self._innernamespaces.add(compound)
877
878     def _load_inner_class(self, compound):
879         compound.set_namespace(self)
880         self._classes.add(compound)
881
882     def _load_element(self, elem):
883         if elem.tag == 'location':
884             self._doclocation = Location(elem)
885             return True
886         return False
887
888     def add_file(self, compound):
889         self._files.add(compound)
890
891     def get_reporter_location(self):
892         return self._doclocation.get_reporter_location()
893
894     def show(self):
895         self.show_base()
896         print 'Doc. loc.: {0}'.format(self._doclocation.get_full_string())
897         _show_list('Inner namespaces', self._innernamespaces)
898         _show_list('Classes', self._classes)
899         self.show_members()
900
901 class Class(Compound):
902     def __init__(self, name, refid):
903         Compound.__init__(self, name, refid)
904         self._location = None
905         self._namespace = None
906         self._files = set()
907         self._baseclasses = []
908         self._derivedclasses = set()
909         self._outerclass = None
910         self._innerclasses = set()
911
912     def _load_inner_class(self, compound):
913         compound.set_outer_class(self)
914         self._innerclasses.add(compound)
915
916     def _load_element(self, elem):
917         if elem.tag == 'basecompoundref':
918             # TODO: Handle unknown bases?
919             if 'refid' in elem.attrib:
920                 refid = elem.attrib['refid']
921                 # TODO: Handle prot and virt attributes, check name?
922                 base = self._docset.get_compound(refid)
923                 self._baseclasses.append(base)
924             return True
925         if elem.tag == 'derivedcompoundref':
926             refid = elem.attrib['refid']
927             # TODO: Handle prot and virt attributes, check name?
928             derived = self._docset.get_compound(refid)
929             self._derivedclasses.add(derived)
930             return True
931         elif elem.tag == 'location':
932             self._location = LocationWithBody(elem)
933             return True
934         return False
935
936     def add_file(self, compound):
937         self._files.add(compound)
938
939     def set_namespace(self, compound):
940         self._namespace = compound
941
942     def set_outer_class(self, compound):
943         self._outerclass = compound
944
945     def get_reporter_location(self):
946         return self._location.get_reporter_location()
947
948     def get_files(self):
949         return self._files
950
951     def is_local(self):
952         if len(self._files) > 1:
953             return False
954         for fileobj in self._files:
955             if not fileobj.is_source_file():
956                 return False
957         return True
958
959     def show(self):
960         self.show_base()
961         print 'Namespace:  {0}'.format(self._namespace)
962         if self._outerclass:
963             print 'Outer cls:  {0}'.format(self._outerclass)
964         location = self._location
965         print 'Location:   {0}'.format(location.get_location().get_full_string())
966         print 'Body loc:   {0}'.format(location.get_body_location().get_full_string())
967         _show_list('Inner classes', self._innerclasses)
968         self.show_members()
969
970 #####################################################################
971 # Top-level container class
972
973 def _get_compound_type_from_kind(kind):
974     """Map compound kinds from Doxygen XML to internal class types."""
975     if kind == 'file':
976         return File
977     elif kind == 'dir':
978         return Directory
979     elif kind == 'group':
980         return Group
981     elif kind == 'namespace':
982         return Namespace
983     elif kind in ('class', 'struct', 'union'):
984         return Class
985     else:
986         return None
987
988 def _get_member_type_from_kind(kind):
989     """Map member kinds from Doxygen XML to internal class types."""
990     if kind == 'define':
991         return Define
992     elif kind == 'variable':
993         return Variable
994     elif kind == 'typedef':
995         return Typedef
996     elif kind == 'enum':
997         return Enum
998     elif kind == 'enumvalue':
999         return EnumValue
1000     elif kind == 'function':
1001         return Function
1002     elif kind == 'friend':
1003         return FriendDeclaration
1004     else:
1005         return None
1006
1007 class DocumentationSet(object):
1008
1009     """Root object for Doxygen XML documentation tree.
1010
1011     On initialization, it reads the index.xml file from the Doxygen XML output,
1012     which contains the list of entities.  Only the ID and name for the entities,
1013     and the parent compounds for members, are available from this file.
1014
1015     load_details() can be called to load the detailed compound XML files.
1016     This constructs relations between compound entities, and initializes other
1017     attributes for the entities.
1018
1019     load_file_details() does the same as load_details(), except that it leaves
1020     those compound XML files unloaded that do not affect file objects or their
1021     parent hierarchy.  This saves some time if details for actual code
1022     constructs like namespaces, classes or members are not necessary.
1023
1024     merge_duplicates() can then be called to remove members with different IDs,
1025     but that actually reference the same code entity.  For some reason, Doxygen
1026     seems to produce these in certain cases.
1027     """
1028
1029     def __init__(self, xmlroot, reporter):
1030         """Initialize the documentation set and read index data."""
1031         self._xmlroot = xmlroot
1032         self._reporter = reporter
1033         xmlpath = os.path.join(xmlroot, 'index.xml')
1034         indextree = ET.parse(xmlpath)
1035         self._compounds = dict()
1036         self._members = dict()
1037         self._files = dict()
1038         for compoundelem in indextree.getroot():
1039             name = compoundelem.find('name').text
1040             refid = compoundelem.attrib['refid']
1041             kind = compoundelem.attrib['kind']
1042             if kind in ('page', 'example'):
1043                 # TODO: Model these types as well
1044                 continue
1045             compoundtype = _get_compound_type_from_kind(kind)
1046             if compoundtype is None:
1047                 reporter.xml_assert(xmlpath,
1048                         "unknown compound kind '{0}'".format(kind))
1049                 continue
1050             compound = compoundtype(name, refid)
1051             compound.set_documentation_set(self)
1052             self._compounds[refid] = compound
1053             for memberelem in compoundelem.iter('member'):
1054                 name = memberelem.find('name').text
1055                 refid = memberelem.attrib['refid']
1056                 kind = memberelem.attrib['kind']
1057                 if refid in self._members:
1058                     member = self._members[refid]
1059                     membertype = _get_member_type_from_kind(kind)
1060                     if not isinstance(member, membertype):
1061                         reporter.xml_assert(xmlpath,
1062                                 "id '{0}' used for multiple kinds of members"
1063                                 .format(refid))
1064                         continue
1065                 else:
1066                     membertype = _get_member_type_from_kind(kind)
1067                     if membertype is None:
1068                         reporter.xml_assert(xmlpath,
1069                                 "unknown member kind '{0}'".format(kind))
1070                         continue
1071                     member = membertype(name, refid)
1072                     member.set_documentation_set(self)
1073                     self._members[refid] = member
1074                 member.add_parent_compound(compound)
1075                 compound.add_member(member)
1076
1077     def load_file_details(self):
1078         """Load detailed XML files for all files and possible parents of files."""
1079         for compound in self._compounds.itervalues():
1080             if isinstance(compound, (File, Directory, Group)):
1081                 compound.load_details()
1082                 if isinstance(compound, File):
1083                     self._files[compound.get_path()] = compound
1084
1085     def load_details(self):
1086         """Load detailed XML files for each compound."""
1087         for compound in self._compounds.itervalues():
1088             compound.load_details()
1089             if isinstance(compound, File):
1090                 self._files[compound.get_path()] = compound
1091         # TODO: Add links to files using location
1092
1093     def merge_duplicates(self):
1094         """Merge duplicate member definitions based on body location.
1095
1096         At least for functions that are declared in a header, but have their
1097         body in a source file, Doxygen seems to create two different IDs, but
1098         the contents of the members are the same, except for the location
1099         attribute.  This method merges members that have identical name and
1100         body location into a single member that keeps the information from both
1101         instances (they should only differ in the location attribute and in
1102         parent compounds).  Both IDs point to the merged member after this
1103         method.
1104         """
1105         members_by_body = dict()
1106         for member in self._members.itervalues():
1107             bodyloc = member.get_body_location()
1108             if bodyloc:
1109                 index = (bodyloc, type(member), member.get_name())
1110                 if index not in members_by_body:
1111                     members_by_body[index] = []
1112                 members_by_body[index].append(member)
1113         for memberlist in members_by_body.itervalues():
1114             if len(memberlist) > 1:
1115                 declaration = None
1116                 otherdeclarations = []
1117                 definition = None
1118                 for member in memberlist:
1119                     if member.has_same_body_location():
1120                         if definition is not None:
1121                             self._reporter.xml_assert(None,
1122                                     "duplicate definition for a member '{0}'"
1123                                     .format(definition))
1124                             continue
1125                         definition = member
1126                     elif declaration is None:
1127                         declaration = member
1128                     else:
1129                         otherdeclarations.append(member)
1130                 if otherdeclarations:
1131                     # TODO: gmx_cpuid.c produces some false positives
1132                     details = []
1133                     for otherdeclaration in otherdeclarations:
1134                         details.append('{0}: another declaration is here'
1135                                 .format(otherdeclaration.get_reporter_location()))
1136                     details.append('{0}: definition is here'
1137                             .format(declaration.get_body_location()))
1138                     text = "duplicate declarations for a member '{0}'".format(declaration)
1139                     self._reporter.code_issue(declaration, text, details)
1140                     continue
1141                 self._members[definition.get_id()] = declaration
1142                 declaration.merge_definition(definition)
1143                 for compound in definition.get_parent_compounds():
1144                     compound.replace_member(definition, declaration)
1145
1146     def get_reporter(self):
1147         """Return reporter object to use for reporting issues.
1148
1149         This method is used in the entity classes to access the reporter when
1150         they are parsing the XML files.
1151         """
1152         return self._reporter
1153
1154     def get_xmlroot(self):
1155         """Return root of the Doxygen XML directory."""
1156         return self._xmlroot
1157
1158     def get_compound(self, refid):
1159         return self._compounds[refid]
1160
1161     def get_member(self, refid):
1162         return self._members[refid]
1163
1164     def get_compounds(self, types, predicate=None):
1165         result = []
1166         for compound in self._compounds.itervalues():
1167             if isinstance(compound, types) and \
1168                     (predicate is None or predicate(compound)):
1169                 result.append(compound)
1170         return result
1171
1172     def get_members(self, types=None, predicate=None):
1173         # self._members can contain duplicates because of merge_duplicates()
1174         result = set()
1175         for member in self._members.itervalues():
1176             if (types is None or isinstance(member, types)) and \
1177                     (predicate is None or predicate(member)):
1178                 result.add(member)
1179         return list(result)
1180
1181     def get_files(self, paths=None):
1182         if paths:
1183             return self.get_compounds(File, lambda x: x.get_name().endswith(paths))
1184         else:
1185             return self.get_compounds(File)
1186
1187     def get_directories(self, paths):
1188         return self.get_compounds(Directory, lambda x: x.get_name().endswith(paths))
1189
1190     def get_groups(self, name):
1191         return self.get_compounds(Group, lambda x: x.get_name() in name)
1192
1193     def get_namespaces(self, name):
1194         return self.get_compounds(Namespace, lambda x: x.get_name() in name)
1195
1196     def get_classes(self, name=None):
1197         if name:
1198             return self.get_compounds(Class, lambda x: x.get_name() in name)
1199         else:
1200             return self.get_compounds(Class)
1201
1202     def get_functions(self, name):
1203         return self.get_members(Member, lambda x: x.get_name() in name)
1204
1205 #####################################################################
1206 # Code for running in script mode
1207
1208 def main():
1209     """Run the script in for debugging/Doxygen XML output inspection."""
1210     import sys
1211
1212     from optparse import OptionParser
1213
1214     from reporter import Reporter
1215
1216     parser = OptionParser()
1217     parser.add_option('-R', '--root-dir',
1218                       help='Doxygen XML root directory')
1219     parser.add_option('-F', '--show-file', action='append',
1220                       help='Show contents of given file')
1221     parser.add_option('-d', '--show-dir', action='append',
1222                       help='Show contents of given directory')
1223     parser.add_option('-g', '--show-group', action='append',
1224                       help='Show contents of given group')
1225     parser.add_option('-n', '--show-namespace', action='append',
1226                       help='Show contents of given namespace')
1227     parser.add_option('-c', '--show-class', action='append',
1228                       help='Show contents of given class')
1229     # TODO: Add option for other types, and make them work
1230     parser.add_option('-f', '--show-function', action='append',
1231                       help='Show details of given function')
1232     options, args = parser.parse_args()
1233
1234     reporter = Reporter()
1235
1236     sys.stderr.write('Loading index.xml...\n')
1237     docset = DocumentationSet(options.root_dir, reporter)
1238     reporter.write_pending()
1239     sys.stderr.write('Loading details...\n')
1240     docset.load_details()
1241     reporter.write_pending()
1242     sys.stderr.write('Processing...\n')
1243     docset.merge_duplicates()
1244     reporter.write_pending()
1245
1246     objlist = []
1247     if options.show_file:
1248         objlist.extend(docset.get_files(tuple(options.show_file)))
1249     if options.show_dir:
1250         objlist.extend(docset.get_directories(tuple(options.show_dir)))
1251     if options.show_group:
1252         objlist.extend(docset.get_groups(tuple(options.show_group)))
1253     if options.show_namespace:
1254         # TODO: Replace file names with anonymous_namespace{filename}
1255         objlist.extend(docset.get_namespaces(tuple(options.show_namespace)))
1256     if options.show_class:
1257         objlist.extend(docset.get_classes(tuple(options.show_class)))
1258     if options.show_function:
1259         objlist.extend(docset.get_functions(tuple(options.show_function)))
1260     for obj in objlist:
1261         obj.show()
1262
1263 if __name__ == '__main__':
1264     main()