python_packaging/src/external/pybind/tools/mkdoc.py

   1 #!/usr/bin/env python3
   2 #
   3 #  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
   4 #
   5 #  Extract documentation from C++ header files to use it in Python bindings
   6 #
   7
   8 import os
   9 import sys
  10 import platform
  11 import re
  12 import textwrap
  13
  14 from clang import cindex
  15 from clang.cindex import CursorKind
  16 from collections import OrderedDict
  17 from glob import glob
  18 from threading import Thread, Semaphore
  19 from multiprocessing import cpu_count
  20
  21 RECURSE_LIST = [
  22     CursorKind.TRANSLATION_UNIT,
  23     CursorKind.NAMESPACE,
  24     CursorKind.CLASS_DECL,
  25     CursorKind.STRUCT_DECL,
  26     CursorKind.ENUM_DECL,
  27     CursorKind.CLASS_TEMPLATE
  28 ]
  29
  30 PRINT_LIST = [
  31     CursorKind.CLASS_DECL,
  32     CursorKind.STRUCT_DECL,
  33     CursorKind.ENUM_DECL,
  34     CursorKind.ENUM_CONSTANT_DECL,
  35     CursorKind.CLASS_TEMPLATE,
  36     CursorKind.FUNCTION_DECL,
  37     CursorKind.FUNCTION_TEMPLATE,
  38     CursorKind.CONVERSION_FUNCTION,
  39     CursorKind.CXX_METHOD,
  40     CursorKind.CONSTRUCTOR,
  41     CursorKind.FIELD_DECL
  42 ]
  43
  44 PREFIX_BLACKLIST = [
  45     CursorKind.TRANSLATION_UNIT
  46 ]
  47
  48 CPP_OPERATORS = {
  49     '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
  50     '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
  51     'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
  52     '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
  53     'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
  54     '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
  55     'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
  56 }
  57
  58 CPP_OPERATORS = OrderedDict(
  59     sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
  60
  61 job_count = cpu_count()
  62 job_semaphore = Semaphore(job_count)
  63
  64
  65 class NoFilenamesError(ValueError):
  66     pass
  67
  68
  69 def d(s):
  70     return s if isinstance(s, str) else s.decode('utf8')
  71
  72
  73 def sanitize_name(name):
  74     name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
  75     for k, v in CPP_OPERATORS.items():
  76         name = name.replace('operator%s' % k, 'operator_%s' % v)
  77     name = re.sub('<.*>', '', name)
  78     name = ''.join([ch if ch.isalnum() else '_' for ch in name])
  79     name = re.sub('_$', '', re.sub('_+', '_', name))
  80     return '__doc_' + name
  81
  82
  83 def process_comment(comment):
  84     result = ''
  85
  86     # Remove C++ comment syntax
  87     leading_spaces = float('inf')
  88     for s in comment.expandtabs(tabsize=4).splitlines():
  89         s = s.strip()
  90         if s.startswith('/*'):
  91             s = s[2:].lstrip('*')
  92         elif s.endswith('*/'):
  93             s = s[:-2].rstrip('*')
  94         elif s.startswith('///'):
  95             s = s[3:]
  96         if s.startswith('*'):
  97             s = s[1:]
  98         if len(s) > 0:
  99             leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
 100         result += s + '\n'
 101
 102     if leading_spaces != float('inf'):
 103         result2 = ""
 104         for s in result.splitlines():
 105             result2 += s[leading_spaces:] + '\n'
 106         result = result2
 107
 108     # Doxygen tags
 109     cpp_group = '([\w:]+)'
 110     param_group = '([\[\w:\]]+)'
 111
 112     s = result
 113     s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
 114     s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
 115     s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
 116     s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
 117     s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
 118     s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
 119     s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
 120                r'\n\n$Parameter ``\2``:\n\n', s)
 121     s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
 122                r'\n\n$Template parameter ``\2``:\n\n', s)
 123
 124     for in_, out_ in {
 125         'return': 'Returns',
 126         'author': 'Author',
 127         'authors': 'Authors',
 128         'copyright': 'Copyright',
 129         'date': 'Date',
 130         'remark': 'Remark',
 131         'sa': 'See also',
 132         'see': 'See also',
 133         'extends': 'Extends',
 134         'throw': 'Throws',
 135         'throws': 'Throws'
 136     }.items():
 137         s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
 138
 139     s = re.sub(r'\\details\s*', r'\n\n', s)
 140     s = re.sub(r'\\brief\s*', r'', s)
 141     s = re.sub(r'\\short\s*', r'', s)
 142     s = re.sub(r'\\ref\s*', r'', s)
 143
 144     s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
 145                r"```\n\1\n```\n", s, flags=re.DOTALL)
 146
 147     # HTML/TeX tags
 148     s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
 149     s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
 150     s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
 151     s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
 152     s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
 153     s = re.sub(r'<li>', r'\n\n* ', s)
 154     s = re.sub(r'</?ul>', r'', s)
 155     s = re.sub(r'</li>', r'\n\n', s)
 156
 157     s = s.replace('``true``', '``True``')
 158     s = s.replace('``false``', '``False``')
 159
 160     # Re-flow text
 161     wrapper = textwrap.TextWrapper()
 162     wrapper.expand_tabs = True
 163     wrapper.replace_whitespace = True
 164     wrapper.drop_whitespace = True
 165     wrapper.width = 70
 166     wrapper.initial_indent = wrapper.subsequent_indent = ''
 167
 168     result = ''
 169     in_code_segment = False
 170     for x in re.split(r'(```)', s):
 171         if x == '```':
 172             if not in_code_segment:
 173                 result += '```\n'
 174             else:
 175                 result += '\n```\n\n'
 176             in_code_segment = not in_code_segment
 177         elif in_code_segment:
 178             result += x.strip()
 179         else:
 180             for y in re.split(r'(?: *\n *){2,}', x):
 181                 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
 182                 if len(wrapped) > 0 and wrapped[0] == '$':
 183                     result += wrapped[1:] + '\n'
 184                     wrapper.initial_indent = \
 185                         wrapper.subsequent_indent = ' ' * 4
 186                 else:
 187                     if len(wrapped) > 0:
 188                         result += wrapped + '\n\n'
 189                     wrapper.initial_indent = wrapper.subsequent_indent = ''
 190     return result.rstrip().lstrip('\n')
 191
 192
 193 def extract(filename, node, prefix, output):
 194     if not (node.location.file is None or
 195             os.path.samefile(d(node.location.file.name), filename)):
 196         return 0
 197     if node.kind in RECURSE_LIST:
 198         sub_prefix = prefix
 199         if node.kind not in PREFIX_BLACKLIST:
 200             if len(sub_prefix) > 0:
 201                 sub_prefix += '_'
 202             sub_prefix += d(node.spelling)
 203         for i in node.get_children():
 204             extract(filename, i, sub_prefix, output)
 205     if node.kind in PRINT_LIST:
 206         comment = d(node.raw_comment) if node.raw_comment is not None else ''
 207         comment = process_comment(comment)
 208         sub_prefix = prefix
 209         if len(sub_prefix) > 0:
 210             sub_prefix += '_'
 211         if len(node.spelling) > 0:
 212             name = sanitize_name(sub_prefix + d(node.spelling))
 213             output.append((name, filename, comment))
 214
 215
 216 class ExtractionThread(Thread):
 217     def __init__(self, filename, parameters, output):
 218         Thread.__init__(self)
 219         self.filename = filename
 220         self.parameters = parameters
 221         self.output = output
 222         job_semaphore.acquire()
 223
 224     def run(self):
 225         print('Processing "%s" ..' % self.filename, file=sys.stderr)
 226         try:
 227             index = cindex.Index(
 228                 cindex.conf.lib.clang_createIndex(False, True))
 229             tu = index.parse(self.filename, self.parameters)
 230             extract(self.filename, tu.cursor, '', self.output)
 231         finally:
 232             job_semaphore.release()
 233
 234
 235 def read_args(args):
 236     parameters = []
 237     filenames = []
 238     if "-x" not in args:
 239         parameters.extend(['-x', 'c++'])
 240     if not any(it.startswith("-std=") for it in args):
 241         parameters.append('-std=c++11')
 242
 243     if platform.system() == 'Darwin':
 244         dev_path = '/Applications/Xcode.app/Contents/Developer/'
 245         lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
 246         sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
 247         libclang = lib_dir + 'libclang.dylib'
 248
 249         if os.path.exists(libclang):
 250             cindex.Config.set_library_path(os.path.dirname(libclang))
 251
 252         if os.path.exists(sdk_dir):
 253             sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
 254             parameters.append('-isysroot')
 255             parameters.append(sysroot_dir)
 256     elif platform.system() == 'Linux':
 257         # clang doesn't find its own base includes by default on Linux,
 258         # but different distros install them in different paths.
 259         # Try to autodetect, preferring the highest numbered version.
 260         def clang_folder_version(d):
 261             return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
 262         clang_include_dir = max((
 263             path
 264             for libdir in ['lib64', 'lib', 'lib32']
 265             for path in glob('/usr/%s/clang/*/include' % libdir)
 266             if os.path.isdir(path)
 267         ), default=None, key=clang_folder_version)
 268         if clang_include_dir:
 269             parameters.extend(['-isystem', clang_include_dir])
 270
 271     for item in args:
 272         if item.startswith('-'):
 273             parameters.append(item)
 274         else:
 275             filenames.append(item)
 276
 277     if len(filenames) == 0:
 278         raise NoFilenamesError("args parameter did not contain any filenames")
 279
 280     return parameters, filenames
 281
 282
 283 def extract_all(args):
 284     parameters, filenames = read_args(args)
 285     output = []
 286     for filename in filenames:
 287         thr = ExtractionThread(filename, parameters, output)
 288         thr.start()
 289
 290     print('Waiting for jobs to finish ..', file=sys.stderr)
 291     for i in range(job_count):
 292         job_semaphore.acquire()
 293
 294     return output
 295
 296
 297 def write_header(comments, out_file=sys.stdout):
 298     print('''/*
 299   This file contains docstrings for the Python bindings.
 300   Do not edit! These were automatically extracted by mkdoc.py
 301  */
 302
 303 #define __EXPAND(x)                                      x
 304 #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
 305 #define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
 306 #define __CAT1(a, b)                                     a ## b
 307 #define __CAT2(a, b)                                     __CAT1(a, b)
 308 #define __DOC1(n1)                                       __doc_##n1
 309 #define __DOC2(n1, n2)                                   __doc_##n1##_##n2
 310 #define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
 311 #define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
 312 #define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
 313 #define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
 314 #define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
 315 #define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
 316
 317 #if defined(__GNUG__)
 318 #pragma GCC diagnostic push
 319 #pragma GCC diagnostic ignored "-Wunused-variable"
 320 #endif
 321 ''', file=out_file)
 322
 323
 324     name_ctr = 1
 325     name_prev = None
 326     for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
 327         if name == name_prev:
 328             name_ctr += 1
 329             name = name + "_%i" % name_ctr
 330         else:
 331             name_prev = name
 332             name_ctr = 1
 333         print('\nstatic const char *%s =%sR"doc(%s)doc";' %
 334               (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
 335
 336     print('''
 337 #if defined(__GNUG__)
 338 #pragma GCC diagnostic pop
 339 #endif
 340 ''', file=out_file)
 341
 342
 343 def mkdoc(args):
 344     args = list(args)
 345     out_path = None
 346     for idx, arg in enumerate(args):
 347         if arg.startswith("-o"):
 348             args.remove(arg)
 349             try:
 350                 out_path = arg[2:] or args.pop(idx)
 351             except IndexError:
 352                 print("-o flag requires an argument")
 353                 exit(-1)
 354             break
 355
 356     comments = extract_all(args)
 357
 358     if out_path:
 359         try:
 360             with open(out_path, 'w') as out_file:
 361                 write_header(comments, out_file)
 362         except:
 363             # In the event of an error, don't leave a partially-written
 364             # output file.
 365             try:
 366                 os.unlink(out_path)
 367             except:
 368                 pass
 369             raise
 370     else:
 371         write_header(comments)
 372
 373
 374 if __name__ == '__main__':
 375     try:
 376         mkdoc(sys.argv[1:])
 377     except NoFilenamesError:
 378         print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
 379         exit(-1)