49de76261c8bf72c27930099ecbd9499dbfd97b3
[alexxy/gromacs.git] / docs / doxygen / doxygenxml.py
1 #!/usr/bin/python
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2014, by the GROMACS development team, led by
6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 # and including many others, as listed in the AUTHORS file in the
8 # top-level source directory and at http://www.gromacs.org.
9 #
10 # GROMACS is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public License
12 # as published by the Free Software Foundation; either version 2.1
13 # of the License, or (at your option) any later version.
14 #
15 # GROMACS is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 # Lesser General Public License for more details.
19 #
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with GROMACS; if not, see
22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24 #
25 # If you want to redistribute modifications to GROMACS, please
26 # consider that scientific software is very special. Version
27 # control is crucial - bugs must be traceable. We will be happy to
28 # consider code for inclusion in the official distribution, but
29 # derived work must not be called official GROMACS. Details are found
30 # in the README & COPYING files - if they are missing, get the
31 # official version at http://www.gromacs.org.
32 #
33 # To help us fund GROMACS development, we humbly ask that you cite
34 # the research papers on the package. Check out http://www.gromacs.org.
35
36 """Doxygen XML output parser.
37
38 This module implements a parser for the Doxygen XML output, converting it into
39 an object model that can be used to navigate the documentation.  It also uses
40 knowledge from how Doxygen works to provide access to things like visibility of
41 individual member documentation (e.g., based on what is the visibility of its
42 parent compound objects).
43
44 The object model is rooted at a DocumentationSet object.  Each documented
45 entity is modeled as an Entity, and this has subclasses Member and Compound to
46 correspond to the two categories of items that Doxygen handles.  These classes
47 are further subclassed to match each kind of entity that Doxygen produces.
48 Only kinds produced by Doxygen from C/C++ code are modeled.  Everything else
49 is ignored after a warning.
50
51 Currently the member entities are not completely parsed from the XML files, and
52 the interface may need additional work to provide convenient access to all
53 member types and their common properties.  For now, focus is in modeling the
54 compound entities.
55
56 The implementation is mostly independent of any GROMACS-specific rules, except
57 for the following:
58  - DocType.library is a GROMACS-specific construct that is deduced from the
59    contents of the detailed description (presence of a \libinternal command in
60    the Doxygen comment triggers it).
61  - DocType.internal is deduced from the presence of a \internal command that
62    covers the whole detailed description.
63  - List of extensions for determining whether a file is a source file only
64    contains extensions actually used by GROMACS.
65 It would be possible to move these out from this file, but that would require
66 exposing the XML representation for the descriptions, which is not nice either.
67
68 The module can also be run as a script that can dump out different parts of the
69 object model.  This can be used to debug the parser, as well as check what is
70 actually in the XML documentation.
71 """
72
73 import os.path
74 import xml.etree.ElementTree as ET
75
76 import reporter
77
78 #####################################################################
79 # Helper functions and classes
80
81 def _show_list(title, objlist):
82     """Helper function for formatting a list of objects for debug output."""
83     if objlist:
84         print '{0}:'.format(title)
85         for obj in objlist:
86             print '  ', obj
87
88 class DocType(object):
89
90     """Documentation visibility in the generated documentation."""
91
92     # Mapping to string representations for the internal integer values
93     _names = ['undocumented', 'internal', 'library', 'public']
94
95     def __init__(self, value):
96         """Initialize a DocType instance.
97
98         DocType.{none,internal,library,public} should be used outside the class
99         instead of calling the constructor.
100         """
101         self._value = value
102
103     def __str__(self):
104         """Return string representation for the documentation type."""
105         return self._names[self._value]
106
107     def __cmp__(self, other):
108         """Order documentation types in the order of visibility."""
109         return cmp(self._value, other._value)
110
111 # Static values for documentation types.
112 DocType.none = DocType(0)
113 DocType.internal = DocType(1)
114 DocType.library = DocType(2)
115 DocType.public = DocType(3)
116
117 class Location(object):
118
119     """Location of a Doxygen entity.
120
121     This class contains the logic to parse a <location> tag in Doxygen XML.
122     It is used as the entity location in cases where body location is not
123     expected, or as part of a LocationWithBody.
124     """
125
126     def __init__(self, elem):
127         """Initialize location from a <location> element."""
128         self.filepath = elem.attrib['file']
129         self.line = int(elem.attrib['line'])
130         self.column = elem.attrib['column']
131
132     def __str__(self):
133         return '{0}:{1}'.format(self.filepath, self.line)
134
135     def get_reporter_location(self):
136         return reporter.Location(self.filepath, self.line)
137
138     def get_full_string(self):
139         return '{0}:{1}:{2}'.format(self.filepath, self.line, self.column)
140
141 class BodyLocation(object):
142
143     """Body location of a Doxygen entity.
144
145     This class contains the logic to parse a body location from a <location>
146     tag in Doxygen XML.  Not all entities have these attributes.
147     This is only used as part of a LocationWithBody, which handles cases where
148     the body location is optional.
149
150     The body location can be compared and hashed so that it can be used in
151     a dictionary for DocumentationSet.merge_duplicates().
152     """
153
154     def __init__(self, elem):
155         """Initialize body location from a <location> element."""
156         self.filepath = elem.attrib['bodyfile']
157         self.startline = int(elem.attrib['bodystart'])
158         self.endline = int(elem.attrib['bodyend'])
159
160     def __cmp__(self, other):
161         result = cmp(self.filepath, other.filepath)
162         if result == 0:
163             result = cmp(self.startline, other.startline)
164         if result == 0:
165             result = cmp(self.endline, other.endline)
166         return result
167
168     def __hash__(self):
169         return hash(self.filepath) ^ hash(self.startline) ^ hash(self.endline)
170
171     def __str__(self):
172         return '{0}:{1}'.format(self.filepath, self.startline)
173
174     def get_full_string(self):
175         if self.endline < 0:
176             return self.__str__()
177         return '{0}:{1}-{2}'.format(self.filepath, self.startline, self.endline)
178
179 class LocationWithBody(object):
180
181     """Location for a Doxygen entity that can have a body location.
182
183     This class is used to represent the location of a Doxygen entity that can
184     have a body location.
185     """
186
187     def __init__(self, elem):
188         """Initialize location from a <location> element."""
189         self._location = Location(elem)
190         if 'bodyfile' in elem.attrib:
191             self._bodylocation = BodyLocation(elem)
192         else:
193             self._bodylocation = None
194
195     def __str__(self):
196         if not self._bodylocation:
197             return '{0} (no body)'.format(self._location)
198         else:
199             return '{0} / {1}'.format(self._location, self._bodylocation)
200
201     def get_reporter_location(self):
202         """Return reporter location for this location.
203
204         All issues are reported at the main location, which should match with
205         the declaration, where most of the documentation typically is.
206         """
207         return self._location.get_reporter_location()
208
209     def get_location(self):
210         return self._location
211
212     def get_body_location(self):
213         return self._bodylocation
214
215     def has_same_body_location(self):
216         """Check whether main location matches body location.
217
218         If the main location is different, then it likely points to the
219         declaration of the function.
220         """
221         return self._location.filepath == self._bodylocation.filepath and \
222                 self._location.line == self._bodylocation.startline
223
224 class MemberSection(object):
225
226     """Section of members within a compound entity."""
227
228     def __init__(self, kind):
229         self._kind = kind
230         self._members = []
231
232     def __str__(self):
233         return self._kind
234
235     def add_member(self, member):
236         self._members.append(member)
237
238     def replace_member(self, old, new):
239         try:
240             pos = self._members.index(old)
241         except ValueError:
242             return
243         self._members[pos] = new
244
245 #####################################################################
246 # Documentation entities
247
248 class Entity(object):
249
250     """Doxygen documentation entity.
251
252     This class represents common properties of an entity that can contain
253     Doxygen documentation.
254     """
255
256     def __init__(self, name, refid):
257         self._docset = None
258         self._name = name
259         self._id = refid
260         self._has_brief_description = False
261         self._has_detailed_description = False
262         self._has_inbody_description = False
263         self._visibility = DocType.none
264
265     def __str__(self):
266         return self._name
267
268     def _get_reporter(self):
269         """Return reporter to use for parsing issues."""
270         return self._docset.get_reporter()
271
272     def set_documentation_set(self, docset):
273         """Set the documentation set this entity belongs to.
274
275         The documentation set parent provides access to a common reporter
276         object, and also allows the entity to resolve references to other
277         entities while loading XML information.
278         """
279         assert self._docset is None
280         self._docset = docset
281
282     def get_id(self):
283         return self._id
284
285     def get_name(self):
286         return self._name
287
288     def get_reporter_location(self):
289         return reporter.Location('<{0}>'.format(self._name), None)
290
291     def get_visibility(self):
292         return self._visibility
293
294     def is_documented(self):
295         return self._visibility != DocType.none
296
297     def has_brief_description(self):
298         return self._has_brief_description
299
300     def has_inbody_description(self):
301         return self._has_inbody_description
302
303     def _process_descriptions(self, briefelem, detailselem, inbodyelem):
304         reporter = self._get_reporter()
305         if briefelem is not None and len(briefelem) > 0:
306             self._has_brief_description = True
307             self._visibility = DocType.public
308         if detailselem is not None and len(detailselem) > 0:
309             self._visibility = DocType.public
310             # Gromacs-specific:
311             # \internal is used at the beginning of a comment block to
312             # mark the block internal to the module.
313             # \libinternal is used similarly, and inserts custom XML
314             # elements.
315             if detailselem[0].tag == 'internal':
316                 if len(detailselem) == 1:
317                     self._visibility = DocType.internal
318                 else:
319                     # TODO: Should we also check if internal appears elsewhere?
320                     reporter.doc_note(self, '\internal does not cover whole documentation')
321             if detailselem[0].find('libinternal') is not None:
322                 if self._visibility == DocType.public:
323                     self._visibility = DocType.library
324                 else:
325                     reporter.doc_error(self, '\libinternal should not be used inside \internal')
326             self._has_detailed_description = True
327         if inbodyelem is not None:
328             self._has_inbody_description = (len(inbodyelem) > 0)
329
330     def show_base(self):
331         """Format information for common properties.
332
333         This is called from subclass show() methods to show base information
334         about the entity.
335         """
336         print 'ID:         {0}'.format(self._id)
337         print 'Name:       {0}'.format(self._name)
338         print 'Location:   {0}'.format(self.get_reporter_location())
339         doctype = []
340         if self._has_brief_description:
341             doctype.append('brief')
342         if self._has_detailed_description:
343             doctype.append('details')
344         if self._has_inbody_description:
345             doctype.append('in-body')
346         if not doctype:
347             doctype.append('none')
348         print 'Doc:        {0}'.format(', '.join(doctype))
349         print 'Visibility: {0}'.format(self._visibility)
350
351 # Member entities
352
353 class Member(Entity):
354
355     """Member entity.
356
357     In Doxygen, a member entity is an entity such as a function or an enum that
358     cannot contain other documented entities (an enum is a slight exception, as
359     enum values are still nested within the enum member).  A member always
360     belongs to one (or more) compounds, which means that the detailed
361     documentation for the member appears on the documentation page for that
362     compound.  If none of the parent compounds are documented, the member
363     doesn't appear anywhere, even if it is documented.
364
365     Member information is loaded from a parent compound's XML file.  If there
366     is more than one parent, the first one encountered will be used
367     (presumably, Doxygen duplicates the information into each XML file).
368     """
369
370     def __init__(self, name, refid):
371         Entity.__init__(self, name, refid)
372         self._parents = set()
373         self._class = None
374         self._namespace = None
375         self._files = set()
376         self._group = None
377         self._location = None
378         self._alternates = set()
379         self._loaded = False
380         # TODO: Move to Entity?
381         self._xmlpath = None
382
383     def add_parent_compound(self, compound):
384         """Add a compound that contains this member."""
385         self._parents.add(compound)
386         if isinstance(compound, Class):
387             assert self._class is None
388             self._class = compound
389         elif isinstance(compound, Namespace):
390             assert self._namespace is None
391             self._namespace = compound
392         elif isinstance(compound, File):
393             self._files.add(compound)
394         elif isinstance(compound, Group):
395             assert self._group is None
396             self._group = compound
397         else:
398             assert False
399
400     def merge_definition(self, definition):
401         """Merge another member into this.
402
403         See DocumentationSet.merge_duplicates().
404         """
405         assert self._class is None
406         assert definition._class is None
407         assert self._group == definition._group
408         assert self._namespace == definition._namespace
409         self._parents.update(definition._parents)
410         self._files.update(definition._files)
411         self._alternates.add(definition)
412
413     def load_details_from_element(self, rootelem, xmlpath):
414         """Load details for the member from a given XML element.
415
416         This method is called when encountering member definitions while
417         processing a compound XML file to load the information for that member.
418         It processes common properties for a member, and delegates other
419         elements to _load_element().
420         """
421         if self._loaded:
422             # TODO: It would be nice to verify that the same information
423             # is present in all instances
424             return
425         self._xmlpath = xmlpath
426         # TODO: Process the attributes
427         reporter = self._get_reporter()
428         briefelem = None
429         detailselem = None
430         inbodyelem = None
431         for elem in rootelem:
432             if elem.tag == 'name':
433                 if elem.text != self.get_name():
434                     reporter.xml_assert(xmlpath,
435                             "member name mismatch: '{0}' (in index.xml) vs. '{1}'".format(
436                                 self.get_name(), elem.text))
437             elif elem.tag == 'briefdescription':
438                 briefelem = elem
439             elif elem.tag == 'detaileddescription':
440                 detailselem = elem
441             elif elem.tag == 'inbodydescription':
442                 # TODO: in-body description is probably only possible for
443                 # functions; move it there.
444                 inbodyelem = elem
445             elif elem.tag == 'location':
446                 self._location = LocationWithBody(elem)
447             else:
448                 if not self._load_element(elem):
449                     # TODO Process the rest of the elements so that we can check this
450                     #reporter.xml_assert(xmlpath,
451                     #        "unknown member child element '{0}'".format(elem.tag))
452                     pass
453         self._process_descriptions(briefelem, detailselem, inbodyelem)
454         self._loaded = True
455
456     def _load_element(self, element):
457         """Load data from a child XML element.
458
459         This method is called for all XML elements under the <memberdef>
460         element that are not handled directly by the Member class.
461         Derived classes should return True if they process the element.
462         """
463         return False
464
465     def _get_raw_location(self):
466         """Returns the BodyLocation object associated with this member.
467
468         This is necessary so that EnumValue can override it report a non-empty
469         location: Doxygen doesn't provide any location for <enumvalue>.
470         """
471         return self._location
472
473     def get_reporter_location(self):
474         return self._get_raw_location().get_reporter_location()
475
476     def get_location(self):
477         """Return main location for the member.
478
479         This typically corresponds to the declaration.
480         """
481         return self._get_raw_location().get_location()
482
483     def get_body_location(self):
484         """Return location of the body for the member.
485
486         Some types of members do not have a body location, in which case this
487         returns None.
488         """
489         return self._get_raw_location().get_body_location()
490
491     def has_same_body_location(self):
492         """Check whether the main location is the same as body location."""
493         return self._get_raw_location().has_same_body_location()
494
495     def get_namespace(self):
496         return self._namespace
497
498     def get_parent_compounds(self):
499         return self._parents
500
501     def get_inherited_visibility(self):
502         return max([parent.get_visibility() for parent in self._parents])
503
504     def show(self):
505         self.show_base()
506         if self._alternates:
507             idlist = [x.get_id() for x in self._alternates]
508             print 'Alt. IDs:   {0}'.format(', '.join(idlist))
509         print 'Parent vis: {0}'.format(self.get_inherited_visibility())
510         print 'Location:   {0}'.format(self.get_location().get_full_string())
511         print 'Body loc:   {0}'.format(self.get_body_location().get_full_string())
512         _show_list('Parents', self._parents)
513
514 class Define(Member):
515     pass
516
517 class Variable(Member):
518     pass
519
520 class Typedef(Member):
521     pass
522
523 class Enum(Member):
524     def __init__(self, name, refid):
525         Member.__init__(self, name, refid)
526         self._values = set()
527
528     def _load_element(self, elem):
529         if elem.tag == 'enumvalue':
530             refid = elem.attrib['id']
531             # Doxygen seems to sometimes assign the same ID to a singleton enum
532             # value (this already triggers a warning in loading index.xml).
533             if refid == self.get_id():
534                 return True
535             member = self._docset.get_member(refid)
536             member.set_enum(self)
537             member.load_details_from_element(elem, self._xmlpath)
538             self._values.add(member)
539             return True
540         return False
541
542     def get_values(self):
543         return self._values
544
545 class EnumValue(Member):
546     def __init__(self, name, refid):
547         Member.__init__(self, name, refid)
548         self._enum = None
549
550     def set_enum(self, member):
551         assert self._enum is None
552         self._enum = member
553
554     def _get_raw_location(self):
555         return self._enum._get_raw_location()
556
557 class Function(Member):
558     pass
559
560 class FriendDeclaration(Member):
561     pass
562
563 # Compound entities
564
565 class Compound(Entity):
566
567     """Compound entity.
568
569     In Doxygen, a compound entity is an entity that has its own documentation
570     page, and can contain other documented entities (either members, or other
571     compounds).  Examples of compounds are files and classes.
572     A compound entity always appears in the documentation, even if it is
573     contained in another compound that is not documented.
574
575     The list of members for a compound is initialized when the XML index file
576     is read.  All other information is loaded from an XML file that is specific
577     to the compound.  In addition to describing the compound, this XML file
578     contains references to contained compounds, and details of all members
579     within the compound.
580     """
581
582     def __init__(self, name, refid):
583         Entity.__init__(self, name, refid)
584         self._members = dict()
585         self._children = set()
586         self._sections = []
587         self._groups = set()
588         self._loaded = False
589
590     def _get_xml_path(self):
591         """Return path to the details XML file for this compound."""
592         return os.path.join(self._docset.get_xmlroot(), self.get_id() + '.xml')
593
594     def add_member(self, member):
595         """Add a contained member."""
596         self._members[member.get_id()] = member
597
598     def add_group(self, compound):
599         """Add a group (a compound entity) that contains this entity."""
600         self._groups.add(compound)
601
602     def replace_member(self, old, new):
603         if old.get_id() not in self._members:
604             raise ValueError("Trying to replace a non-existent member")
605         elif new.get_id() in self._members:
606             raise ValueError("Trying to replace with an existing member")
607         self._members[old.get_id()] = new
608         for section in self._sections:
609             section.replace_member(old, new)
610
611     def load_details(self):
612         """Load details for the compound from its details XML file.
613
614         This method processes common properties for a compound.
615         References to inner compounds are delegated to _load_inner_*() methods,
616         and all members encountered in the XML file are loaded with
617         Member.load_details_from_element().
618         Other elements are delegated to _load_element().
619         """
620         if self._loaded:
621             return
622         reporter = self._get_reporter()
623         xmlpath = self._get_xml_path()
624         compoundtree = ET.parse(xmlpath)
625         root = compoundtree.getroot()
626         if len(root) > 1:
627             reporter.xml_assert(xmlpath, "more than one compound in a file")
628         if root[0].tag != 'compounddef':
629             reporter.xml_assert(xmlpath, "expected <compounddef> as the first tag")
630             return
631         briefelem = None
632         detailselem = None
633         missing_members = set(self._members.values())
634         for elem in root[0]:
635             if elem.tag == 'compoundname':
636                 if elem.text != self.get_name():
637                     reporter.xml_assert(xmlpath,
638                             "compound name mismatch: '{0}' (in index.xml) vs. '{1}'"
639                             .format(self.get_name(), elem.text))
640             elif elem.tag == 'briefdescription':
641                 briefelem = elem
642             elif elem.tag == 'detaileddescription':
643                 detailselem = elem
644             elif elem.tag in ('includes', 'includedby', 'incdepgraph',
645                     'invincdepgraph', 'inheritancegraph', 'collaborationgraph',
646                     'programlisting', 'templateparamlist', 'listofallmembers'):
647                 pass
648             elif elem.tag.startswith('inner'):
649                 refid = elem.attrib['refid']
650                 reftype = elem.tag[5:]
651                 # TODO: Handle 'prot' attribute?
652                 refcompound = self._docset.get_compound(refid)
653                 self._children.add(refcompound)
654                 if reftype == 'file':
655                     self._load_inner_file(refcompound)
656                 elif reftype == 'dir':
657                     self._load_inner_dir(refcompound)
658                 elif reftype == 'group':
659                     self._load_inner_group(refcompound)
660                 elif reftype == 'namespace':
661                     self._load_inner_namespace(refcompound)
662                 elif reftype == 'class':
663                     self._load_inner_class(refcompound)
664                 else:
665                     reporter.xml_assert(xmlpath,
666                             "unknown inner compound type '{0}'".format(reftype))
667             elif elem.tag == 'sectiondef':
668                 # TODO: Handle header and description elements
669                 kind = elem.attrib['kind']
670                 section = MemberSection(kind)
671                 self._sections.append(section)
672                 for memberelem in elem.iter('memberdef'):
673                     refid = memberelem.attrib['id']
674                     member = self._members[refid]
675                     member.load_details_from_element(memberelem, xmlpath)
676                     section.add_member(member)
677                     if member in missing_members:
678                         missing_members.remove(member)
679                     # Enum values need special handling, but are not worth
680                     # extra generalization.
681                     if isinstance(member, Enum):
682                         missing_members.difference_update(member.get_values())
683             else:
684                 if not self._load_element(elem):
685                     reporter.xml_assert(xmlpath,
686                             "unknown compound child element '{0}'".format(elem.tag))
687         if missing_members:
688             reporter.xml_assert(xmlpath, 'members without section')
689         self._process_descriptions(briefelem, detailselem, None)
690         self._loaded = True
691
692     def _unexpected_inner_compound(self, typename, compound):
693         """Report a parsing error for an unexpected inner compound reference."""
694         reporter = self._get_reporter()
695         xmlpath = self._get_xml_path()
696         reporter.xml_assert(xmlpath,
697                 "unexpected inner {0}: {1}".format(typename, compound))
698
699     def _load_inner_file(self, compound):
700         """Process a reference to an inner file.
701
702         Derived classes should override the method if the compound type can
703         contain files as nested compounds.
704         """
705         self._unexpected_inner_compound("file", compound)
706
707     def _load_inner_dir(self, compound):
708         """Process a reference to an inner directory.
709
710         Derived classes should override the method if the compound type can
711         contain directories as nested compounds.
712         """
713         self._unexpected_inner_compound("dir", compound)
714
715     def _load_inner_group(self, compound):
716         """Process a reference to an inner group.
717
718         Derived classes should override the method if the compound type can
719         contain groups as nested compounds.
720         """
721         self._unexpected_inner_compound("group", compound)
722
723     def _load_inner_namespace(self, compound):
724         """Process a reference to an inner namespace.
725
726         Derived classes should override the method if the compound type can
727         contain namespaces as nested compounds.
728         """
729         self._unexpected_inner_compound("namespace", compound)
730
731     def _load_inner_class(self, compound):
732         """Process a reference to an inner class.
733
734         Derived classes should override the method if the compound type can
735         contain classes as nested compounds.
736         """
737         self._unexpected_inner_compound("class", compound)
738
739     def _load_element(self, element):
740         """Load data from a child XML element.
741
742         This method is called for all XML elements under the <compounddef>
743         element that are not handled directly by the Compound class.
744         Derived classes should return True if they process the element.
745         """
746         return False
747
748     def get_groups(self):
749         return self._groups
750
751     def show_base(self):
752         """Format information for common properties.
753
754         This extends Entity.show_base() by adding properties that are common to
755         all compounds.
756         """
757         Entity.show_base(self)
758         if self._groups:
759             print 'Groups:   {0}'.format(', '.join(map(str, self._groups)))
760
761     def show_members(self):
762         """Show list of members.
763
764         This method is provided for use in show() methods of derived classes
765         to print the list of members.
766         """
767         for section in self._sections:
768             print 'Member section: {0}'.format(section)
769             for member in section._members:
770                 print '  ', member
771
772 class File(Compound):
773     def __init__(self, name, refid):
774         Compound.__init__(self, name, refid)
775         self._path = None
776         self._directory = None
777         self._classes = set()
778         self._namespaces = set()
779         self._is_source_file = None
780
781     def _load_inner_class(self, compound):
782         compound.add_file(self)
783         self._classes.add(compound)
784
785     def _load_inner_namespace(self, compound):
786         compound.add_file(self)
787         self._namespaces.add(compound)
788
789     def _load_element(self, elem):
790         if elem.tag == 'location':
791             self._path = elem.attrib['file']
792             extension = os.path.splitext(self._path)[1]
793             self._is_source_file = (extension in ('.c', '.cpp', '.cu'))
794             return True
795         return False
796
797     def set_directory(self, directory):
798         self._directory = directory
799
800     def get_reporter_location(self):
801         return reporter.Location(self._path, None)
802
803     def get_path(self):
804         return self._path
805
806     def get_directory(self):
807         return self._directory
808
809     def is_source_file(self):
810         return self._is_source_file
811
812     def show(self):
813         self.show_base()
814         print 'Path:      {0}'.format(self._path)
815         print 'Directory: {0}'.format(self._directory)
816         print 'Source:    {0}'.format(self._is_source_file)
817         _show_list('Namespaces', self._namespaces)
818         _show_list('Classes', self._classes)
819         self.show_members()
820
821 class Directory(Compound):
822     def __init__(self, name, refid):
823         Compound.__init__(self, name, refid)
824         self._path = None
825         self._parent = None
826         self._subdirs = set()
827         self._files = set()
828
829     def _load_inner_file(self, compound):
830         compound.set_directory(self)
831         self._files.add(compound)
832
833     def _load_inner_dir(self, compound):
834         compound._parent = self
835         self._subdirs.add(compound)
836
837     def _load_element(self, elem):
838         if elem.tag == 'location':
839             self._path = elem.attrib['file']
840             return True
841         return False
842
843     def get_reporter_location(self):
844         return reporter.Location(self._path, None)
845
846     def get_path(self):
847         return self._path
848
849     def get_parent(self):
850         return self._parent
851
852     def get_subdirectories(self):
853         return self._subdirs
854
855     def show(self):
856         self.show_base()
857         print 'Path:      {0}'.format(self._path)
858         if self._parent:
859             print 'Parent:    {0}'.format(self._parent)
860         _show_list('Subdirectories', self._subdirs)
861         _show_list('Files', self._files)
862
863 class Group(Compound):
864     def __init__(self, name, refid):
865         Compound.__init__(self, name, refid)
866         self._title = None
867         self._files = set()
868         self._nestedgroups = set()
869         self._namespaces = set()
870         self._classes = set()
871
872     def _load_inner_file(self, compound):
873         compound.add_group(self)
874         self._files.add(compound)
875
876     # Doxygen 1.8.5 doesn't seem to put the directories into the XML output,
877     # even though they are in the HTML output as group members...
878
879     def _load_inner_group(self, compound):
880         compound.add_group(self)
881         self._nestedgroups.add(compound)
882
883     def _load_inner_namespace(self, compound):
884         compound.add_group(self)
885         self._namespaces.add(compound)
886
887     def _load_inner_class(self, compound):
888         compound.add_group(self)
889         self._classes.add(compound)
890
891     def _load_element(self, elem):
892         if elem.tag == 'title':
893             self._title = elem.text
894             return True
895         return False
896
897     def show(self):
898         self.show_base()
899         print 'Title:     {0}'.format(self._title)
900         print 'Inner compounds:'
901         for compound in self._children:
902             print '  ', compound
903         self.show_members()
904
905 class Namespace(Compound):
906     def __init__(self, name, refid):
907         Compound.__init__(self, name, refid)
908         self._doclocation = None
909         self._files = set()
910         self._parent = None
911         self._innernamespaces = set()
912         self._classes = set()
913
914     def _load_inner_namespace(self, compound):
915         compound._parent = self
916         self._innernamespaces.add(compound)
917
918     def _load_inner_class(self, compound):
919         compound.set_namespace(self)
920         self._classes.add(compound)
921
922     def _load_element(self, elem):
923         if elem.tag == 'location':
924             self._doclocation = Location(elem)
925             return True
926         return False
927
928     def add_file(self, compound):
929         self._files.add(compound)
930
931     def get_reporter_location(self):
932         return self._doclocation.get_reporter_location()
933
934     def is_anonymous(self):
935         return 'anonymous_namespace{' in self.get_name()
936
937     def show(self):
938         self.show_base()
939         print 'Doc. loc.: {0}'.format(self._doclocation.get_full_string())
940         _show_list('Inner namespaces', self._innernamespaces)
941         _show_list('Classes', self._classes)
942         self.show_members()
943
944 class Class(Compound):
945     def __init__(self, name, refid):
946         Compound.__init__(self, name, refid)
947         self._location = None
948         self._namespace = None
949         self._files = set()
950         self._baseclasses = []
951         self._derivedclasses = set()
952         self._outerclass = None
953         self._innerclasses = set()
954
955     def _load_inner_class(self, compound):
956         compound.set_outer_class(self)
957         self._innerclasses.add(compound)
958
959     def _load_element(self, elem):
960         if elem.tag == 'basecompoundref':
961             # TODO: Handle unknown bases?
962             if 'refid' in elem.attrib:
963                 refid = elem.attrib['refid']
964                 # TODO: Handle prot and virt attributes, check name?
965                 base = self._docset.get_compound(refid)
966                 self._baseclasses.append(base)
967             return True
968         if elem.tag == 'derivedcompoundref':
969             refid = elem.attrib['refid']
970             # TODO: Handle prot and virt attributes, check name?
971             derived = self._docset.get_compound(refid)
972             self._derivedclasses.add(derived)
973             return True
974         elif elem.tag == 'location':
975             self._location = LocationWithBody(elem)
976             return True
977         return False
978
979     def add_file(self, compound):
980         self._files.add(compound)
981
982     def set_namespace(self, compound):
983         self._namespace = compound
984
985     def set_outer_class(self, compound):
986         self._outerclass = compound
987
988     def get_reporter_location(self):
989         return self._location.get_reporter_location()
990
991     def get_files(self):
992         return self._files
993
994     def is_local(self):
995         if len(self._files) > 1:
996             return False
997         for fileobj in self._files:
998             if not fileobj.is_source_file():
999                 return False
1000         return True
1001
1002     def show(self):
1003         self.show_base()
1004         print 'Namespace:  {0}'.format(self._namespace)
1005         if self._outerclass:
1006             print 'Outer cls:  {0}'.format(self._outerclass)
1007         location = self._location
1008         print 'Location:   {0}'.format(location.get_location().get_full_string())
1009         print 'Body loc:   {0}'.format(location.get_body_location().get_full_string())
1010         _show_list('Inner classes', self._innerclasses)
1011         self.show_members()
1012
1013 #####################################################################
1014 # Top-level container class
1015
1016 def _get_compound_type_from_kind(kind):
1017     """Map compound kinds from Doxygen XML to internal class types."""
1018     if kind == 'file':
1019         return File
1020     elif kind == 'dir':
1021         return Directory
1022     elif kind == 'group':
1023         return Group
1024     elif kind == 'namespace':
1025         return Namespace
1026     elif kind in ('class', 'struct', 'union'):
1027         return Class
1028     else:
1029         return None
1030
1031 def _get_member_type_from_kind(kind):
1032     """Map member kinds from Doxygen XML to internal class types."""
1033     if kind == 'define':
1034         return Define
1035     elif kind == 'variable':
1036         return Variable
1037     elif kind == 'typedef':
1038         return Typedef
1039     elif kind == 'enum':
1040         return Enum
1041     elif kind == 'enumvalue':
1042         return EnumValue
1043     elif kind == 'function':
1044         return Function
1045     elif kind == 'friend':
1046         return FriendDeclaration
1047     else:
1048         return None
1049
1050 class DocumentationSet(object):
1051
1052     """Root object for Doxygen XML documentation tree.
1053
1054     On initialization, it reads the index.xml file from the Doxygen XML output,
1055     which contains the list of entities.  Only the ID and name for the entities,
1056     and the parent compounds for members, are available from this file.
1057
1058     load_details() can be called to load the detailed compound XML files.
1059     This constructs relations between compound entities, and initializes other
1060     attributes for the entities.
1061
1062     load_file_details() does the same as load_details(), except that it leaves
1063     those compound XML files unloaded that do not affect file objects or their
1064     parent hierarchy.  This saves some time if details for actual code
1065     constructs like namespaces, classes or members are not necessary.
1066
1067     merge_duplicates() can then be called to remove members with different IDs,
1068     but that actually reference the same code entity.  For some reason, Doxygen
1069     seems to produce these in certain cases.
1070     """
1071
1072     def __init__(self, xmlroot, reporter):
1073         """Initialize the documentation set and read index data."""
1074         self._xmlroot = xmlroot
1075         self._reporter = reporter
1076         xmlpath = os.path.join(xmlroot, 'index.xml')
1077         indextree = ET.parse(xmlpath)
1078         self._compounds = dict()
1079         self._members = dict()
1080         self._files = dict()
1081         for compoundelem in indextree.getroot():
1082             name = compoundelem.find('name').text
1083             refid = compoundelem.attrib['refid']
1084             kind = compoundelem.attrib['kind']
1085             if kind in ('page', 'example'):
1086                 # TODO: Model these types as well
1087                 continue
1088             compoundtype = _get_compound_type_from_kind(kind)
1089             if compoundtype is None:
1090                 reporter.xml_assert(xmlpath,
1091                         "unknown compound kind '{0}'".format(kind))
1092                 continue
1093             compound = compoundtype(name, refid)
1094             compound.set_documentation_set(self)
1095             self._compounds[refid] = compound
1096             for memberelem in compoundelem.iter('member'):
1097                 name = memberelem.find('name').text
1098                 refid = memberelem.attrib['refid']
1099                 kind = memberelem.attrib['kind']
1100                 if refid in self._members:
1101                     member = self._members[refid]
1102                     membertype = _get_member_type_from_kind(kind)
1103                     if not isinstance(member, membertype):
1104                         reporter.xml_assert(xmlpath,
1105                                 "id '{0}' used for multiple kinds of members"
1106                                 .format(refid))
1107                         continue
1108                 else:
1109                     membertype = _get_member_type_from_kind(kind)
1110                     if membertype is None:
1111                         reporter.xml_assert(xmlpath,
1112                                 "unknown member kind '{0}'".format(kind))
1113                         continue
1114                     member = membertype(name, refid)
1115                     member.set_documentation_set(self)
1116                     self._members[refid] = member
1117                 member.add_parent_compound(compound)
1118                 compound.add_member(member)
1119
1120     def load_file_details(self):
1121         """Load detailed XML files for all files and possible parents of files."""
1122         for compound in self._compounds.itervalues():
1123             if isinstance(compound, (File, Directory, Group)):
1124                 compound.load_details()
1125                 if isinstance(compound, File):
1126                     self._files[compound.get_path()] = compound
1127
1128     def load_details(self):
1129         """Load detailed XML files for each compound."""
1130         for compound in self._compounds.itervalues():
1131             compound.load_details()
1132             if isinstance(compound, File):
1133                 self._files[compound.get_path()] = compound
1134         # TODO: Add links to files using location
1135
1136     def merge_duplicates(self):
1137         """Merge duplicate member definitions based on body location.
1138
1139         At least for some functions that are declared in a header, but have
1140         their body in a source file, Doxygen seems to create two different IDs,
1141         but the contents of the members are the same, except for the location
1142         attribute.  This method merges members that have identical name and
1143         body location into a single member that keeps the information from both
1144         instances (they should only differ in the location attribute and in
1145         parent compounds).  Both IDs point to the merged member after this
1146         method.
1147         """
1148         members_by_body = dict()
1149         for member in self._members.itervalues():
1150             bodyloc = member.get_body_location()
1151             if bodyloc:
1152                 index = (bodyloc, type(member), member.get_name())
1153                 if index not in members_by_body:
1154                     members_by_body[index] = []
1155                 members_by_body[index].append(member)
1156         for memberlist in members_by_body.itervalues():
1157             if len(memberlist) > 1:
1158                 declaration = None
1159                 otherdeclarations = []
1160                 definition = None
1161                 for member in memberlist:
1162                     if member.has_same_body_location():
1163                         if definition is not None:
1164                             self._reporter.xml_assert(None,
1165                                     "duplicate definition for a member '{0}'"
1166                                     .format(definition))
1167                             continue
1168                         definition = member
1169                     elif declaration is None:
1170                         declaration = member
1171                     else:
1172                         otherdeclarations.append(member)
1173                 if otherdeclarations:
1174                     # TODO: gmx_cpuid.c produces some false positives
1175                     details = []
1176                     for otherdeclaration in otherdeclarations:
1177                         details.append('{0}: another declaration is here'
1178                                 .format(otherdeclaration.get_reporter_location()))
1179                     details.append('{0}: definition is here'
1180                             .format(declaration.get_body_location()))
1181                     text = "duplicate declarations for a member '{0}'".format(declaration)
1182                     self._reporter.code_issue(declaration, text, details)
1183                     continue
1184                 self._members[definition.get_id()] = declaration
1185                 declaration.merge_definition(definition)
1186                 for compound in definition.get_parent_compounds():
1187                     compound.replace_member(definition, declaration)
1188
1189     def get_reporter(self):
1190         """Return reporter object to use for reporting issues.
1191
1192         This method is used in the entity classes to access the reporter when
1193         they are parsing the XML files.
1194         """
1195         return self._reporter
1196
1197     def get_xmlroot(self):
1198         """Return root of the Doxygen XML directory."""
1199         return self._xmlroot
1200
1201     def get_compound(self, refid):
1202         return self._compounds[refid]
1203
1204     def get_member(self, refid):
1205         return self._members[refid]
1206
1207     def get_compounds(self, types, predicate=None):
1208         result = []
1209         for compound in self._compounds.itervalues():
1210             if isinstance(compound, types) and \
1211                     (predicate is None or predicate(compound)):
1212                 result.append(compound)
1213         return result
1214
1215     def get_members(self, types=None, predicate=None):
1216         # self._members can contain duplicates because of merge_duplicates()
1217         result = set()
1218         for member in self._members.itervalues():
1219             if (types is None or isinstance(member, types)) and \
1220                     (predicate is None or predicate(member)):
1221                 result.add(member)
1222         return list(result)
1223
1224     def get_files(self, paths=None):
1225         if paths:
1226             return self.get_compounds(File, lambda x: x.get_name().endswith(paths))
1227         else:
1228             return self.get_compounds(File)
1229
1230     def get_directories(self, paths):
1231         return self.get_compounds(Directory, lambda x: x.get_name().endswith(paths))
1232
1233     def get_groups(self, name):
1234         return self.get_compounds(Group, lambda x: x.get_name() in name)
1235
1236     def get_namespaces(self, name=None):
1237         if name:
1238             return self.get_compounds(Namespace, lambda x: x.get_name() in name)
1239         else:
1240             return self.get_compounds(Namespace)
1241
1242     def get_classes(self, name=None):
1243         if name:
1244             return self.get_compounds(Class, lambda x: x.get_name() in name)
1245         else:
1246             return self.get_compounds(Class)
1247
1248     def get_functions(self, name):
1249         return self.get_members(Member, lambda x: x.get_name() in name)
1250
1251 #####################################################################
1252 # Code for running in script mode
1253
1254 def main():
1255     """Run the script in for debugging/Doxygen XML output inspection."""
1256     import sys
1257
1258     from optparse import OptionParser
1259
1260     from reporter import Reporter
1261
1262     parser = OptionParser()
1263     parser.add_option('-R', '--root-dir',
1264                       help='Doxygen XML root directory')
1265     parser.add_option('-F', '--show-file', action='append',
1266                       help='Show contents of given file')
1267     parser.add_option('-d', '--show-dir', action='append',
1268                       help='Show contents of given directory')
1269     parser.add_option('-g', '--show-group', action='append',
1270                       help='Show contents of given group')
1271     parser.add_option('-n', '--show-namespace', action='append',
1272                       help='Show contents of given namespace')
1273     parser.add_option('-c', '--show-class', action='append',
1274                       help='Show contents of given class')
1275     # TODO: Add option for other types, and make them work
1276     parser.add_option('-f', '--show-function', action='append',
1277                       help='Show details of given function')
1278     options, args = parser.parse_args()
1279
1280     reporter = Reporter()
1281
1282     sys.stderr.write('Loading index.xml...\n')
1283     docset = DocumentationSet(options.root_dir, reporter)
1284     reporter.write_pending()
1285     sys.stderr.write('Loading details...\n')
1286     docset.load_details()
1287     reporter.write_pending()
1288     sys.stderr.write('Processing...\n')
1289     docset.merge_duplicates()
1290     reporter.write_pending()
1291
1292     objlist = []
1293     if options.show_file:
1294         objlist.extend(docset.get_files(tuple(options.show_file)))
1295     if options.show_dir:
1296         objlist.extend(docset.get_directories(tuple(options.show_dir)))
1297     if options.show_group:
1298         objlist.extend(docset.get_groups(tuple(options.show_group)))
1299     if options.show_namespace:
1300         # TODO: Replace file names with anonymous_namespace{filename}
1301         objlist.extend(docset.get_namespaces(tuple(options.show_namespace)))
1302     if options.show_class:
1303         objlist.extend(docset.get_classes(tuple(options.show_class)))
1304     if options.show_function:
1305         objlist.extend(docset.get_functions(tuple(options.show_function)))
1306     for obj in objlist:
1307         obj.show()
1308
1309 if __name__ == '__main__':
1310     main()