3 # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
5 # Extract documentation from C++ header files to use it in Python bindings
14 from clang import cindex
15 from clang.cindex import CursorKind
16 from collections import OrderedDict
18 from threading import Thread, Semaphore
19 from multiprocessing import cpu_count
22 CursorKind.TRANSLATION_UNIT,
24 CursorKind.CLASS_DECL,
25 CursorKind.STRUCT_DECL,
27 CursorKind.CLASS_TEMPLATE
31 CursorKind.CLASS_DECL,
32 CursorKind.STRUCT_DECL,
34 CursorKind.ENUM_CONSTANT_DECL,
35 CursorKind.CLASS_TEMPLATE,
36 CursorKind.FUNCTION_DECL,
37 CursorKind.FUNCTION_TEMPLATE,
38 CursorKind.CONVERSION_FUNCTION,
39 CursorKind.CXX_METHOD,
40 CursorKind.CONSTRUCTOR,
45 CursorKind.TRANSLATION_UNIT
49 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
50 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
51 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
52 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
53 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
54 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
55 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
58 CPP_OPERATORS = OrderedDict(
59 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
61 job_count = cpu_count()
62 job_semaphore = Semaphore(job_count)
65 class NoFilenamesError(ValueError):
70 return s if isinstance(s, str) else s.decode('utf8')
73 def sanitize_name(name):
74 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
75 for k, v in CPP_OPERATORS.items():
76 name = name.replace('operator%s' % k, 'operator_%s' % v)
77 name = re.sub('<.*>', '', name)
78 name = ''.join([ch if ch.isalnum() else '_' for ch in name])
79 name = re.sub('_$', '', re.sub('_+', '_', name))
80 return '__doc_' + name
83 def process_comment(comment):
86 # Remove C++ comment syntax
87 leading_spaces = float('inf')
88 for s in comment.expandtabs(tabsize=4).splitlines():
90 if s.startswith('/*'):
92 elif s.endswith('*/'):
93 s = s[:-2].rstrip('*')
94 elif s.startswith('///'):
99 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
102 if leading_spaces != float('inf'):
104 for s in result.splitlines():
105 result2 += s[leading_spaces:] + '\n'
109 cpp_group = '([\w:]+)'
110 param_group = '([\[\w:\]]+)'
113 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
114 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
115 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
116 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
117 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
118 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
119 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
120 r'\n\n$Parameter ``\2``:\n\n', s)
121 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
122 r'\n\n$Template parameter ``\2``:\n\n', s)
127 'authors': 'Authors',
128 'copyright': 'Copyright',
133 'extends': 'Extends',
137 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
139 s = re.sub(r'\\details\s*', r'\n\n', s)
140 s = re.sub(r'\\brief\s*', r'', s)
141 s = re.sub(r'\\short\s*', r'', s)
142 s = re.sub(r'\\ref\s*', r'', s)
144 s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
145 r"```\n\1\n```\n", s, flags=re.DOTALL)
148 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
149 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
150 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
151 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
152 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
153 s = re.sub(r'<li>', r'\n\n* ', s)
154 s = re.sub(r'</?ul>', r'', s)
155 s = re.sub(r'</li>', r'\n\n', s)
157 s = s.replace('``true``', '``True``')
158 s = s.replace('``false``', '``False``')
161 wrapper = textwrap.TextWrapper()
162 wrapper.expand_tabs = True
163 wrapper.replace_whitespace = True
164 wrapper.drop_whitespace = True
166 wrapper.initial_indent = wrapper.subsequent_indent = ''
169 in_code_segment = False
170 for x in re.split(r'(```)', s):
172 if not in_code_segment:
175 result += '\n```\n\n'
176 in_code_segment = not in_code_segment
177 elif in_code_segment:
180 for y in re.split(r'(?: *\n *){2,}', x):
181 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
182 if len(wrapped) > 0 and wrapped[0] == '$':
183 result += wrapped[1:] + '\n'
184 wrapper.initial_indent = \
185 wrapper.subsequent_indent = ' ' * 4
188 result += wrapped + '\n\n'
189 wrapper.initial_indent = wrapper.subsequent_indent = ''
190 return result.rstrip().lstrip('\n')
193 def extract(filename, node, prefix, output):
194 if not (node.location.file is None or
195 os.path.samefile(d(node.location.file.name), filename)):
197 if node.kind in RECURSE_LIST:
199 if node.kind not in PREFIX_BLACKLIST:
200 if len(sub_prefix) > 0:
202 sub_prefix += d(node.spelling)
203 for i in node.get_children():
204 extract(filename, i, sub_prefix, output)
205 if node.kind in PRINT_LIST:
206 comment = d(node.raw_comment) if node.raw_comment is not None else ''
207 comment = process_comment(comment)
209 if len(sub_prefix) > 0:
211 if len(node.spelling) > 0:
212 name = sanitize_name(sub_prefix + d(node.spelling))
213 output.append((name, filename, comment))
216 class ExtractionThread(Thread):
217 def __init__(self, filename, parameters, output):
218 Thread.__init__(self)
219 self.filename = filename
220 self.parameters = parameters
222 job_semaphore.acquire()
225 print('Processing "%s" ..' % self.filename, file=sys.stderr)
227 index = cindex.Index(
228 cindex.conf.lib.clang_createIndex(False, True))
229 tu = index.parse(self.filename, self.parameters)
230 extract(self.filename, tu.cursor, '', self.output)
232 job_semaphore.release()
239 parameters.extend(['-x', 'c++'])
240 if not any(it.startswith("-std=") for it in args):
241 parameters.append('-std=c++11')
243 if platform.system() == 'Darwin':
244 dev_path = '/Applications/Xcode.app/Contents/Developer/'
245 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
246 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
247 libclang = lib_dir + 'libclang.dylib'
249 if os.path.exists(libclang):
250 cindex.Config.set_library_path(os.path.dirname(libclang))
252 if os.path.exists(sdk_dir):
253 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
254 parameters.append('-isysroot')
255 parameters.append(sysroot_dir)
256 elif platform.system() == 'Linux':
257 # clang doesn't find its own base includes by default on Linux,
258 # but different distros install them in different paths.
259 # Try to autodetect, preferring the highest numbered version.
260 def clang_folder_version(d):
261 return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
262 clang_include_dir = max((
264 for libdir in ['lib64', 'lib', 'lib32']
265 for path in glob('/usr/%s/clang/*/include' % libdir)
266 if os.path.isdir(path)
267 ), default=None, key=clang_folder_version)
268 if clang_include_dir:
269 parameters.extend(['-isystem', clang_include_dir])
272 if item.startswith('-'):
273 parameters.append(item)
275 filenames.append(item)
277 if len(filenames) == 0:
278 raise NoFilenamesError("args parameter did not contain any filenames")
280 return parameters, filenames
283 def extract_all(args):
284 parameters, filenames = read_args(args)
286 for filename in filenames:
287 thr = ExtractionThread(filename, parameters, output)
290 print('Waiting for jobs to finish ..', file=sys.stderr)
291 for i in range(job_count):
292 job_semaphore.acquire()
297 def write_header(comments, out_file=sys.stdout):
299 This file contains docstrings for the Python bindings.
300 Do not edit! These were automatically extracted by mkdoc.py
303 #define __EXPAND(x) x
304 #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
305 #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
306 #define __CAT1(a, b) a ## b
307 #define __CAT2(a, b) __CAT1(a, b)
308 #define __DOC1(n1) __doc_##n1
309 #define __DOC2(n1, n2) __doc_##n1##_##n2
310 #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
311 #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
312 #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
313 #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
314 #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
315 #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
317 #if defined(__GNUG__)
318 #pragma GCC diagnostic push
319 #pragma GCC diagnostic ignored "-Wunused-variable"
326 for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
327 if name == name_prev:
329 name = name + "_%i" % name_ctr
333 print('\nstatic const char *%s =%sR"doc(%s)doc";' %
334 (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
337 #if defined(__GNUG__)
338 #pragma GCC diagnostic pop
346 for idx, arg in enumerate(args):
347 if arg.startswith("-o"):
350 out_path = arg[2:] or args.pop(idx)
352 print("-o flag requires an argument")
356 comments = extract_all(args)
360 with open(out_path, 'w') as out_file:
361 write_header(comments, out_file)
363 # In the event of an error, don't leave a partially-written
371 write_header(comments)
374 if __name__ == '__main__':
377 except NoFilenamesError:
378 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])