]>
Commit | Line | Data |
---|---|---|
f329fa92 | 1 | #!/usr/bin/env python |
2 | ||
06ccae0f | 3 | ## @package thtml2doxy_clang |
4 | # Translates THtml C++ comments to Doxygen using libclang as parser. | |
5 | # | |
6 | # This code relies on Python bindings for libclang: libclang's interface is pretty unstable, and | |
7 | # its Python bindings are unstable as well. | |
8 | # | |
9 | # AST (Abstract Source Tree) traversal is performed entirely using libclang used as a C++ parser, | |
10 | # instead of attempting to write a parser ourselves. | |
11 | # | |
12 | # This code (expecially AST traversal) was inspired by: | |
13 | # | |
14 | # - [Implementing a code generator with libclang](http://szelei.me/code-generator/) | |
15 | # (this refers to API calls used here) | |
16 | # - [Parsing C++ in Python with Clang](http://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang) | |
17 | # (outdated, API calls described there do not work anymore, but useful to understand some basic | |
18 | # concepts) | |
19 | # | |
20 | # Usage: | |
21 | # | |
22 | # `thtml2doxy_clang [--stdout|-o] [-d] [--debug=DEBUG_LEVEL] file1 [file2 [file3...]]` | |
23 | # | |
24 | # Parameters: | |
25 | # | |
26 | # - `--stdout|-o`: output all on standard output instead of writing files in place | |
27 | # - `-d`: enable debug mode (very verbose output) | |
28 | # - `--debug=DEBUG_LEVEL`: set debug level to one of `DEBUG`, `INFO`, `WARNING`, `ERROR`, | |
29 | # `CRITICAL` | |
30 | # | |
31 | # @author Dario Berzano, CERN | |
32 | # @date 2014-12-05 | |
33 | ||
34 | ||
f329fa92 | 35 | import sys |
36 | import os | |
37 | import re | |
06ccae0f | 38 | import logging |
39 | import getopt | |
ee1793c7 | 40 | import hashlib |
06ccae0f | 41 | import clang.cindex |
42 | ||
43 | ||
44 | ## Brain-dead color output for terminal. | |
45 | class Colt(str): | |
46 | ||
47 | def red(self): | |
48 | return self.color('\033[31m') | |
49 | ||
50 | def green(self): | |
51 | return self.color('\033[32m') | |
52 | ||
53 | def yellow(self): | |
54 | return self.color('\033[33m') | |
55 | ||
56 | def blue(self): | |
57 | return self.color('\033[34m') | |
58 | ||
59 | def magenta(self): | |
60 | return self.color('\033[35m') | |
61 | ||
62 | def cyan(self): | |
63 | return self.color('\033[36m') | |
64 | ||
65 | def color(self, c): | |
66 | return c + self + '\033[m' | |
67 | ||
f329fa92 | 68 | |
06ccae0f | 69 | ## Comment. |
70 | class Comment: | |
71 | ||
72 | def __init__(self, lines, first_line, first_col, last_line, last_col, indent, func): | |
3896b0ea | 73 | assert first_line > 0 and last_line >= first_line, 'Wrong line numbers' |
06ccae0f | 74 | self.lines = lines |
75 | self.first_line = first_line | |
76 | self.first_col = first_col | |
77 | self.last_line = last_line | |
78 | self.last_col = last_col | |
79 | self.indent = indent | |
80 | self.func = func | |
81 | ||
82 | def has_comment(self, line): | |
83 | return line >= self.first_line and line <= self.last_line | |
84 | ||
85 | def __str__(self): | |
86 | return "<Comment for %s: [%d,%d:%d,%d] %s>" % (self.func, self.first_line, self.first_col, self.last_line, self.last_col, self.lines) | |
87 | ||
88 | ||
89 | ## A data member comment. | |
90 | class MemberComment: | |
91 | ||
21689d7a | 92 | def __init__(self, text, comment_flag, array_size, first_line, first_col, func): |
3896b0ea | 93 | assert first_line > 0, 'Wrong line number' |
21689d7a | 94 | assert comment_flag is None or comment_flag == '!' or comment_flag in [ '!', '||', '->' ] |
06ccae0f | 95 | self.lines = [ text ] |
21689d7a | 96 | self.comment_flag = comment_flag |
06ccae0f | 97 | self.array_size = array_size |
98 | self.first_line = first_line | |
99 | self.first_col = first_col | |
100 | self.func = func | |
101 | ||
21689d7a | 102 | def is_transient(self): |
103 | return self.comment_flag == '!' | |
104 | ||
105 | def is_dontsplit(self): | |
106 | return self.comment_flag == '||' | |
107 | ||
108 | def is_ptr(self): | |
109 | return self.comment_flag == '->' | |
110 | ||
06ccae0f | 111 | def has_comment(self, line): |
112 | return line == self.first_line | |
113 | ||
114 | def __str__(self): | |
115 | ||
21689d7a | 116 | if self.is_transient(): |
06ccae0f | 117 | tt = '!transient! ' |
21689d7a | 118 | elif self.is_dontsplit(): |
119 | tt = '!dontsplit! ' | |
120 | elif self.is_ptr(): | |
121 | tt = '!ptr! ' | |
06ccae0f | 122 | else: |
123 | tt = '' | |
124 | ||
125 | if self.array_size is not None: | |
126 | ars = '[%s] ' % self.array_size | |
127 | else: | |
128 | ars = '' | |
129 | ||
130 | return "<MemberComment for %s: [%d,%d] %s%s%s>" % (self.func, self.first_line, self.first_col, tt, ars, self.lines[0]) | |
131 | ||
132 | ||
133 | ## A dummy comment that removes comment lines. | |
134 | class RemoveComment(Comment): | |
135 | ||
136 | def __init__(self, first_line, last_line): | |
3896b0ea | 137 | assert first_line > 0 and last_line >= first_line, 'Wrong line numbers' |
06ccae0f | 138 | self.first_line = first_line |
139 | self.last_line = last_line | |
140 | self.func = '<remove>' | |
141 | ||
142 | def __str__(self): | |
143 | return "<RemoveComment: [%d,%d]>" % (self.first_line, self.last_line) | |
144 | ||
145 | ||
146 | ## Parses method comments. | |
f329fa92 | 147 | # |
06ccae0f | 148 | # @param cursor Current libclang parser cursor |
149 | # @param comments Array of comments: new ones will be appended there | |
150 | def comment_method(cursor, comments): | |
151 | ||
152 | # we are looking for the following structure: method -> compound statement -> comment, i.e. we | |
153 | # need to extract the first comment in the compound statement composing the method | |
154 | ||
155 | in_compound_stmt = False | |
156 | expect_comment = False | |
157 | emit_comment = False | |
158 | ||
159 | comment = [] | |
160 | comment_function = cursor.spelling or cursor.displayname | |
161 | comment_line_start = -1 | |
162 | comment_line_end = -1 | |
163 | comment_col_start = -1 | |
164 | comment_col_end = -1 | |
165 | comment_indent = -1 | |
166 | ||
167 | for token in cursor.get_tokens(): | |
168 | ||
169 | if token.cursor.kind == clang.cindex.CursorKind.COMPOUND_STMT: | |
170 | if not in_compound_stmt: | |
171 | in_compound_stmt = True | |
172 | expect_comment = True | |
173 | comment_line_end = -1 | |
174 | else: | |
175 | if in_compound_stmt: | |
176 | in_compound_stmt = False | |
177 | emit_comment = True | |
178 | ||
179 | # tkind = str(token.kind)[str(token.kind).index('.')+1:] | |
180 | # ckind = str(token.cursor.kind)[str(token.cursor.kind).index('.')+1:] | |
181 | ||
182 | if in_compound_stmt: | |
183 | ||
184 | if expect_comment: | |
185 | ||
186 | extent = token.extent | |
187 | line_start = extent.start.line | |
188 | line_end = extent.end.line | |
189 | ||
190 | if token.kind == clang.cindex.TokenKind.PUNCTUATION and token.spelling == '{': | |
191 | pass | |
192 | ||
193 | elif token.kind == clang.cindex.TokenKind.COMMENT and (comment_line_end == -1 or (line_start == comment_line_end+1 and line_end-line_start == 0)): | |
194 | comment_line_end = line_end | |
195 | comment_col_end = extent.end.column | |
196 | ||
197 | if comment_indent == -1 or (extent.start.column-1) < comment_indent: | |
198 | comment_indent = extent.start.column-1 | |
199 | ||
200 | if comment_line_start == -1: | |
201 | comment_line_start = line_start | |
202 | comment_col_start = extent.start.column | |
203 | comment.extend( token.spelling.split('\n') ) | |
204 | ||
205 | # multiline comments are parsed in one go, therefore don't expect subsequent comments | |
206 | if line_end - line_start > 0: | |
207 | emit_comment = True | |
208 | expect_comment = False | |
209 | ||
210 | else: | |
211 | emit_comment = True | |
212 | expect_comment = False | |
213 | ||
214 | if emit_comment: | |
215 | ||
6f0e3bf3 | 216 | if comment_line_start > 0: |
06ccae0f | 217 | |
ee1793c7 | 218 | comment = refactor_comment( comment, infilename=str(cursor.location.file) ) |
6f0e3bf3 | 219 | |
220 | if len(comment) > 0: | |
221 | logging.debug("Comment found for function %s" % Colt(comment_function).magenta()) | |
222 | comments.append( Comment(comment, comment_line_start, comment_col_start, comment_line_end, comment_col_end, comment_indent, comment_function) ) | |
223 | else: | |
5dccb084 | 224 | logging.debug('Empty comment found for function %s: collapsing' % Colt(comment_function).magenta()) |
225 | comments.append( Comment([''], comment_line_start, comment_col_start, comment_line_end, comment_col_end, comment_indent, comment_function) ) | |
226 | #comments.append(RemoveComment(comment_line_start, comment_line_end)) | |
6f0e3bf3 | 227 | |
228 | else: | |
229 | logging.warning('No comment found for function %s' % Colt(comment_function).magenta()) | |
06ccae0f | 230 | |
231 | comment = [] | |
232 | comment_line_start = -1 | |
233 | comment_line_end = -1 | |
234 | comment_col_start = -1 | |
235 | comment_col_end = -1 | |
236 | comment_indent = -1 | |
237 | ||
238 | emit_comment = False | |
239 | break | |
240 | ||
241 | ||
242 | ## Parses comments to class data members. | |
243 | # | |
244 | # @param cursor Current libclang parser cursor | |
245 | # @param comments Array of comments: new ones will be appended there | |
246 | def comment_datamember(cursor, comments): | |
247 | ||
248 | # Note: libclang 3.5 seems to have problems parsing a certain type of FIELD_DECL, so we revert | |
249 | # to a partial manual parsing. When parsing fails, the cursor's "extent" is not set properly, | |
250 | # returning a line range 0-0. We therefore make the not-so-absurd assumption that the datamember | |
251 | # definition is fully on one line, and we take the line number from cursor.location. | |
252 | ||
253 | line_num = cursor.location.line | |
254 | raw = None | |
255 | prev = None | |
256 | found = False | |
257 | ||
21689d7a | 258 | # Huge overkill: current line saved in "raw", previous in "prev" |
06ccae0f | 259 | with open(str(cursor.location.file)) as fp: |
260 | cur_line = 0 | |
261 | for raw in fp: | |
262 | cur_line = cur_line + 1 | |
263 | if cur_line == line_num: | |
264 | found = True | |
265 | break | |
266 | prev = raw | |
267 | ||
268 | assert found, 'A line that should exist was not found in file' % cursor.location.file | |
269 | ||
21689d7a | 270 | recomm = r'(//(!|\|\||->)|///?)(\[([0-9,]+)\])?<?\s*(.*?)\s*$' |
271 | recomm_prevline = r'^\s*///\s*(.*?)\s*$' | |
06ccae0f | 272 | |
273 | mcomm = re.search(recomm, raw) | |
274 | if mcomm: | |
54203c62 | 275 | # If it does not match, we do not have a comment |
06ccae0f | 276 | member_name = cursor.spelling; |
21689d7a | 277 | comment_flag = mcomm.group(2) |
06ccae0f | 278 | array_size = mcomm.group(4) |
279 | text = mcomm.group(5) | |
280 | ||
281 | col_num = mcomm.start()+1; | |
282 | ||
283 | if array_size is not None and prev is not None: | |
284 | # ROOT arrays with comments already converted to Doxygen have the member description on the | |
285 | # previous line | |
21689d7a | 286 | mcomm_prevline = re.search(recomm_prevline, prev) |
287 | if mcomm_prevline: | |
288 | text = mcomm_prevline.group(1) | |
06ccae0f | 289 | comments.append(RemoveComment(line_num-1, line_num-1)) |
290 | ||
291 | logging.debug('Comment found for member %s' % Colt(member_name).magenta()) | |
292 | ||
293 | comments.append( MemberComment( | |
294 | text, | |
21689d7a | 295 | comment_flag, |
06ccae0f | 296 | array_size, |
297 | line_num, | |
298 | col_num, | |
299 | member_name )) | |
300 | ||
06ccae0f | 301 | |
302 | ## Parses class description (beginning of file). | |
303 | # | |
304 | # The clang parser does not work in this case so we do it manually, but it is very simple: we keep | |
305 | # the first consecutive sequence of single-line comments (//) we find - provided that it occurs | |
306 | # before any other comment found so far in the file (the comments array is inspected to ensure | |
307 | # this). | |
f329fa92 | 308 | # |
06ccae0f | 309 | # Multi-line comments (/* ... */) are not considered as they are commonly used to display |
310 | # copyright notice. | |
f329fa92 | 311 | # |
06ccae0f | 312 | # @param filename Name of the current file |
313 | # @param comments Array of comments: new ones will be appended there | |
314 | def comment_classdesc(filename, comments): | |
315 | ||
316 | recomm = r'^\s*///?(\s*.*?)\s*/*\s*$' | |
317 | ||
318 | reclass_doxy = r'(?i)^\s*\\class:?\s*(.*?)\s*$' | |
319 | class_name_doxy = None | |
320 | ||
321 | reauthor = r'(?i)^\s*\\?authors?:?\s*(.*?)\s*(,?\s*([0-9./-]+))?\s*$' | |
322 | redate = r'(?i)^\s*\\?date:?\s*([0-9./-]+)\s*$' | |
323 | author = None | |
324 | date = None | |
325 | ||
326 | comment_lines = [] | |
327 | ||
328 | start_line = -1 | |
329 | end_line = -1 | |
330 | ||
331 | line_num = 0 | |
332 | ||
333 | with open(filename, 'r') as fp: | |
334 | ||
335 | for raw in fp: | |
336 | ||
337 | line_num = line_num + 1 | |
338 | ||
424eef90 | 339 | if raw.strip() == '' and start_line > 0: |
06ccae0f | 340 | # Skip empty lines |
06ccae0f | 341 | continue |
342 | ||
343 | stripped = strip_html(raw) | |
344 | mcomm = re.search(recomm, stripped) | |
345 | if mcomm: | |
346 | ||
424eef90 | 347 | if start_line == -1: |
06ccae0f | 348 | |
349 | # First line. Check that we do not overlap with other comments | |
350 | comment_overlaps = False | |
351 | for c in comments: | |
352 | if c.has_comment(line_num): | |
353 | comment_overlaps = True | |
354 | break | |
355 | ||
356 | if comment_overlaps: | |
357 | # No need to look for other comments | |
358 | break | |
359 | ||
360 | start_line = line_num | |
361 | ||
dde83b2b | 362 | end_line = line_num |
06ccae0f | 363 | append = True |
364 | ||
365 | mclass_doxy = re.search(reclass_doxy, mcomm.group(1)) | |
366 | if mclass_doxy: | |
367 | class_name_doxy = mclass_doxy.group(1) | |
368 | append = False | |
369 | else: | |
370 | mauthor = re.search(reauthor, mcomm.group(1)) | |
371 | if mauthor: | |
372 | author = mauthor.group(1) | |
373 | if date is None: | |
374 | # Date specified in the standalone \date field has priority | |
9b8614a7 | 375 | date = mauthor.group(3) |
06ccae0f | 376 | append = False |
377 | else: | |
378 | mdate = re.search(redate, mcomm.group(1)) | |
379 | if mdate: | |
380 | date = mdate.group(1) | |
381 | append = False | |
382 | ||
383 | if append: | |
384 | comment_lines.append( mcomm.group(1) ) | |
385 | ||
386 | else: | |
424eef90 | 387 | if start_line > 0: |
06ccae0f | 388 | break |
389 | ||
390 | if class_name_doxy is None: | |
391 | ||
392 | # No \class specified: guess it from file name | |
393 | reclass = r'^(.*/)?(.*?)(\..*)?$' | |
394 | mclass = re.search( reclass, filename ) | |
395 | if mclass: | |
396 | class_name_doxy = mclass.group(2) | |
397 | else: | |
398 | assert False, 'Regexp unable to extract classname from file' | |
399 | ||
424eef90 | 400 | if start_line > 0: |
06ccae0f | 401 | |
424eef90 | 402 | # Prepend \class specifier (and an empty line) |
403 | comment_lines[:0] = [ '\\class ' + class_name_doxy ] | |
06ccae0f | 404 | |
424eef90 | 405 | # Append author and date if they exist |
406 | comment_lines.append('') | |
06ccae0f | 407 | |
424eef90 | 408 | if author is not None: |
409 | comment_lines.append( '\\author ' + author ) | |
06ccae0f | 410 | |
424eef90 | 411 | if date is not None: |
412 | comment_lines.append( '\\date ' + date ) | |
413 | ||
ee1793c7 | 414 | comment_lines = refactor_comment(comment_lines, do_strip_html=False, infilename=filename) |
424eef90 | 415 | logging.debug('Comment found for class %s' % Colt(class_name_doxy).magenta()) |
416 | comments.append(Comment( | |
417 | comment_lines, | |
418 | start_line, 1, end_line, 1, | |
419 | 0, class_name_doxy | |
420 | )) | |
421 | ||
422 | else: | |
423 | ||
424 | logging.warning('No comment found for class %s' % Colt(class_name_doxy).magenta()) | |
06ccae0f | 425 | |
426 | ||
427 | ## Traverse the AST recursively starting from the current cursor. | |
428 | # | |
429 | # @param cursor A Clang parser cursor | |
430 | # @param filename Name of the current file | |
431 | # @param comments Array of comments: new ones will be appended there | |
432 | # @param recursion Current recursion depth | |
433 | def traverse_ast(cursor, filename, comments, recursion=0): | |
434 | ||
435 | # libclang traverses included files as well: we do not want this behavior | |
436 | if cursor.location.file is not None and str(cursor.location.file) != filename: | |
437 | logging.debug("Skipping processing of included %s" % cursor.location.file) | |
438 | return | |
439 | ||
440 | text = cursor.spelling or cursor.displayname | |
441 | kind = str(cursor.kind)[str(cursor.kind).index('.')+1:] | |
442 | ||
443 | indent = '' | |
444 | for i in range(0, recursion): | |
445 | indent = indent + ' ' | |
446 | ||
447 | if cursor.kind == clang.cindex.CursorKind.CXX_METHOD or cursor.kind == clang.cindex.CursorKind.CONSTRUCTOR or cursor.kind == clang.cindex.CursorKind.DESTRUCTOR: | |
448 | ||
449 | # cursor ran into a C++ method | |
450 | logging.debug( "%5d %s%s(%s)" % (cursor.location.line, indent, Colt(kind).magenta(), Colt(text).blue()) ) | |
451 | comment_method(cursor, comments) | |
452 | ||
453 | elif cursor.kind == clang.cindex.CursorKind.FIELD_DECL: | |
454 | ||
455 | # cursor ran into a data member declaration | |
456 | logging.debug( "%5d %s%s(%s)" % (cursor.location.line, indent, Colt(kind).magenta(), Colt(text).blue()) ) | |
457 | comment_datamember(cursor, comments) | |
458 | ||
459 | else: | |
460 | ||
461 | logging.debug( "%5d %s%s(%s)" % (cursor.location.line, indent, kind, text) ) | |
462 | ||
463 | for child_cursor in cursor.get_children(): | |
464 | traverse_ast(child_cursor, filename, comments, recursion+1) | |
465 | ||
466 | if recursion == 0: | |
467 | comment_classdesc(filename, comments) | |
468 | ||
469 | ||
470 | ## Strip some HTML tags from the given string. Returns clean string. | |
471 | # | |
472 | # @param s Input string | |
473 | def strip_html(s): | |
798167cb | 474 | rehtml = r'(?i)</?(P|BR)/?>' |
06ccae0f | 475 | return re.sub(rehtml, '', s) |
476 | ||
477 | ||
478 | ## Remove garbage from comments and convert special tags from THtml to Doxygen. | |
479 | # | |
480 | # @param comment An array containing the lines of the original comment | |
ee1793c7 | 481 | def refactor_comment(comment, do_strip_html=True, infilename=None): |
06ccae0f | 482 | |
483 | recomm = r'^(/{2,}|/\*)? ?(\s*.*?)\s*((/{2,})?\s*|\*/)$' | |
484 | regarbage = r'^(?i)\s*([\s*=-_#]+|(Begin|End)_Html)\s*$' | |
485 | ||
03a6b7aa | 486 | # Support for LaTeX blocks spanning on multiple lines |
487 | relatex = r'(?i)^((.*?)\s+)?(BEGIN|END)_LATEX([.,;:\s]+.*)?$' | |
488 | in_latex = False | |
489 | latex_block = False | |
490 | ||
491 | # Support for LaTeX blocks on a single line | |
9db428b7 | 492 | reinline_latex = r'(?i)(.*)BEGIN_LATEX\s+(.*?)\s+END_LATEX(.*)$' |
493 | ||
35b193b4 | 494 | # Match <pre> (to turn it into the ~~~ Markdown syntax) |
495 | reblock = r'(?i)^(\s*)</?PRE>\s*$' | |
496 | ||
ee1793c7 | 497 | # Macro blocks for pictures generation |
498 | in_macro = False | |
499 | current_macro = [] | |
500 | remacro = r'(?i)^\s*(BEGIN|END)_MACRO(\((.*?)\))?\s*$' | |
501 | ||
06ccae0f | 502 | new_comment = [] |
503 | insert_blank = False | |
504 | wait_first_non_blank = True | |
505 | for line_comment in comment: | |
506 | ||
ee1793c7 | 507 | # Check if we are in a macro block |
508 | mmacro = re.search(remacro, line_comment) | |
509 | if mmacro: | |
510 | if in_macro: | |
511 | in_macro = False | |
512 | ||
513 | # Dump macro | |
514 | outimg = write_macro(infilename, current_macro) + '.png' | |
515 | current_macro = [] | |
516 | ||
517 | # Insert image | |
518 | new_comment.append( '![Picture from ROOT macro](%s)' % (outimg) ) | |
519 | ||
520 | logging.debug( 'Found macro for generating image %s' % Colt(outimg).magenta() ) | |
521 | ||
522 | else: | |
523 | in_macro = True | |
524 | ||
525 | continue | |
526 | elif in_macro: | |
527 | current_macro.append( line_comment ) | |
528 | continue | |
529 | ||
06ccae0f | 530 | # Strip some HTML tags |
531 | if do_strip_html: | |
532 | line_comment = strip_html(line_comment) | |
533 | ||
534 | mcomm = re.search( recomm, line_comment ) | |
535 | if mcomm: | |
536 | new_line_comment = mcomm.group(2) | |
537 | mgarbage = re.search( regarbage, new_line_comment ) | |
538 | ||
539 | if new_line_comment == '' or mgarbage is not None: | |
540 | insert_blank = True | |
541 | else: | |
542 | if insert_blank and not wait_first_non_blank: | |
543 | new_comment.append('') | |
544 | insert_blank = False | |
545 | wait_first_non_blank = False | |
9db428b7 | 546 | |
547 | # Postprocessing: LaTeX formulas in ROOT format | |
548 | # Marked by BEGIN_LATEX ... END_LATEX and they use # in place of \ | |
549 | # There can be several ROOT LaTeX forumlas per line | |
550 | while True: | |
551 | minline_latex = re.search( reinline_latex, new_line_comment ) | |
552 | if minline_latex: | |
553 | new_line_comment = '%s\\f$%s\\f$%s' % \ | |
554 | ( minline_latex.group(1), minline_latex.group(2).replace('#', '\\'), | |
555 | minline_latex.group(3) ) | |
556 | else: | |
557 | break | |
558 | ||
03a6b7aa | 559 | # ROOT LaTeX: do we have a Begin/End_LaTeX block? |
560 | # Note: the presence of LaTeX "closures" does not exclude the possibility to have a begin | |
561 | # block here left without a corresponding ending block | |
562 | mlatex = re.search( relatex, new_line_comment ) | |
563 | if mlatex: | |
564 | ||
565 | # before and after parts have been already stripped | |
566 | l_before = mlatex.group(2) | |
567 | l_after = mlatex.group(4) | |
568 | is_begin = mlatex.group(3).upper() == 'BEGIN' # if not, END | |
569 | ||
570 | if l_before is None: | |
571 | l_before = '' | |
572 | if l_after is None: | |
573 | l_after = '' | |
574 | ||
575 | if is_begin: | |
576 | ||
577 | # Begin of LaTeX part | |
578 | ||
579 | in_latex = True | |
580 | if l_before == '' and l_after == '': | |
581 | ||
582 | # Opening tag alone: mark the beginning of a block: \f[ ... \f] | |
583 | latex_block = True | |
584 | new_comment.append( '\\f[' ) | |
585 | ||
586 | else: | |
587 | # Mark the beginning of inline: \f$ ... \f$ | |
588 | latex_block = False | |
589 | new_comment.append( | |
590 | '%s \\f$%s' % ( l_before, l_after.replace('#', '\\') ) | |
591 | ) | |
592 | ||
593 | else: | |
594 | ||
595 | # End of LaTeX part | |
596 | in_latex = False | |
597 | ||
598 | if latex_block: | |
599 | ||
600 | # Closing a LaTeX block | |
601 | if l_before != '': | |
602 | new_comment.append( l_before.replace('#', '\\') ) | |
603 | new_comment.append( '\\f]' ) | |
604 | if l_after != '': | |
605 | new_comment.append( l_after ) | |
606 | ||
607 | else: | |
608 | ||
609 | # Closing a LaTeX inline | |
610 | new_comment.append( | |
611 | '%s\\f$%s' % ( l_before.replace('#', '\\'), l_after ) | |
612 | ) | |
613 | ||
614 | # Prevent appending lines (we have already done that) | |
615 | new_line_comment = None | |
616 | ||
35b193b4 | 617 | # If we are not in a LaTeX block, look for <pre> tags and transform them into Doxygen code |
618 | # blocks (using ~~~ ... ~~~). Only <pre> tags on a single line are supported | |
619 | if new_line_comment is not None and not in_latex: | |
620 | ||
621 | mblock = re.search( reblock, new_line_comment ) | |
622 | if mblock: | |
623 | new_comment.append( mblock.group(1)+'~~~' ) | |
624 | new_line_comment = None | |
625 | ||
03a6b7aa | 626 | if new_line_comment is not None: |
627 | if in_latex: | |
628 | new_line_comment = new_line_comment.replace('#', '\\') | |
629 | new_comment.append( new_line_comment ) | |
06ccae0f | 630 | |
631 | else: | |
632 | assert False, 'Comment regexp does not match' | |
633 | ||
634 | return new_comment | |
635 | ||
636 | ||
ee1793c7 | 637 | ## Dumps an image-generating macro to the correct place. Returns a string with the image path, |
638 | # without the extension. | |
639 | # | |
640 | # @param infilename File name of the source file | |
641 | # @param macro_lines Array of macro lines | |
642 | def write_macro(infilename, macro_lines): | |
643 | ||
644 | # Calculate hash | |
645 | digh = hashlib.sha1() | |
646 | for l in macro_lines: | |
647 | digh.update(l) | |
648 | digh.update('\n') | |
649 | short_digest = digh.hexdigest()[0:7] | |
650 | ||
651 | outdir = '%s/imgdoc' % os.path.dirname(infilename) | |
652 | outprefix = '%s/%s_%s' % ( | |
653 | outdir, | |
654 | os.path.basename(infilename).replace('.', '_'), | |
655 | short_digest | |
656 | ) | |
657 | outmacro = '%s.C' % outprefix | |
658 | ||
659 | # Make directory | |
660 | if not os.path.isdir(outdir): | |
661 | # do not catch: let everything die on error | |
662 | logging.debug('Creating directory %s' % Colt(outdir).magenta()) | |
663 | os.mkdir(outdir) | |
664 | ||
665 | # Create file (do not catch errors either) | |
666 | with open(outmacro, 'w') as omfp: | |
667 | logging.debug('Writing macro %s' % Colt(outmacro).magenta()) | |
668 | for l in macro_lines: | |
669 | omfp.write(l) | |
670 | omfp.write('\n') | |
671 | ||
672 | return outprefix | |
673 | ||
674 | ||
06ccae0f | 675 | ## Rewrites all comments from the given file handler. |
676 | # | |
677 | # @param fhin The file handler to read from | |
678 | # @param fhout The file handler to write to | |
679 | # @param comments Array of comments | |
680 | def rewrite_comments(fhin, fhout, comments): | |
681 | ||
682 | line_num = 0 | |
683 | in_comment = False | |
684 | skip_empty = False | |
685 | comm = None | |
686 | prev_comm = None | |
687 | ||
688 | rindent = r'^(\s*)' | |
689 | ||
690 | for line in fhin: | |
691 | ||
692 | line_num = line_num + 1 | |
693 | ||
694 | # Find current comment | |
695 | prev_comm = comm | |
696 | comm = None | |
697 | for c in comments: | |
698 | if c.has_comment(line_num): | |
699 | comm = c | |
700 | ||
701 | if comm: | |
702 | ||
703 | if isinstance(comm, MemberComment): | |
704 | non_comment = line[ 0:comm.first_col-1 ] | |
705 | ||
21689d7a | 706 | if comm.array_size is not None or comm.is_dontsplit() or comm.is_ptr(): |
06ccae0f | 707 | |
21689d7a | 708 | # This is a special case: comment will be split in two lines: one before the comment for |
709 | # Doxygen as "member description", and the other right after the comment on the same line | |
710 | # to be parsed by ROOT's C++ parser | |
711 | ||
712 | # Keep indent on the generated line of comment before member definition | |
06ccae0f | 713 | mindent = re.search(rindent, line) |
21689d7a | 714 | |
715 | # Get correct comment flag, if any | |
716 | if comm.comment_flag is not None: | |
717 | cflag = comm.comment_flag | |
718 | else: | |
719 | cflag = '' | |
720 | ||
721 | # Get correct array size, if any | |
722 | if comm.array_size is not None: | |
723 | asize = '[%s]' % comm.array_size | |
06ccae0f | 724 | else: |
21689d7a | 725 | asize = '' |
06ccae0f | 726 | |
21689d7a | 727 | # Write on two lines |
728 | fhout.write('%s/// %s\n%s//%s%s\n' % ( | |
06ccae0f | 729 | mindent.group(1), |
730 | comm.lines[0], | |
731 | non_comment, | |
21689d7a | 732 | cflag, |
733 | asize | |
06ccae0f | 734 | )) |
735 | ||
736 | else: | |
737 | ||
21689d7a | 738 | # Single-line comments with the "transient" flag can be kept on one line in a way that |
739 | # they are correctly interpreted by both ROOT and Doxygen | |
740 | ||
741 | if comm.is_transient(): | |
06ccae0f | 742 | tt = '!' |
743 | else: | |
744 | tt = '/' | |
745 | ||
746 | fhout.write('%s//%s< %s\n' % ( | |
747 | non_comment, | |
748 | tt, | |
749 | comm.lines[0] | |
750 | )) | |
751 | ||
752 | elif isinstance(comm, RemoveComment): | |
753 | # Do nothing: just skip line | |
754 | pass | |
755 | ||
756 | elif prev_comm is None: | |
757 | # Beginning of a new comment block of type Comment | |
758 | in_comment = True | |
759 | ||
760 | # Extract the non-comment part and print it if it exists | |
761 | non_comment = line[ 0:comm.first_col-1 ].rstrip() | |
762 | if non_comment != '': | |
763 | fhout.write( non_comment + '\n' ) | |
764 | ||
765 | else: | |
766 | ||
767 | if in_comment: | |
768 | ||
769 | # We have just exited a comment block of type Comment | |
770 | in_comment = False | |
771 | ||
772 | # Dump revamped comment, if applicable | |
773 | text_indent = '' | |
774 | for i in range(0,prev_comm.indent): | |
775 | text_indent = text_indent + ' ' | |
776 | ||
777 | for lc in prev_comm.lines: | |
778 | fhout.write( "%s/// %s\n" % (text_indent, lc) ); | |
779 | fhout.write('\n') | |
780 | skip_empty = True | |
781 | ||
782 | line_out = line.rstrip('\n') | |
783 | if skip_empty: | |
784 | skip_empty = False | |
785 | if line_out.strip() != '': | |
786 | fhout.write( line_out + '\n' ) | |
787 | else: | |
788 | fhout.write( line_out + '\n' ) | |
789 | ||
f329fa92 | 790 | |
791 | ## The main function. | |
792 | # | |
06ccae0f | 793 | # Return value is the executable's return value. |
f329fa92 | 794 | def main(argv): |
795 | ||
06ccae0f | 796 | # Setup logging on stderr |
797 | log_level = logging.INFO | |
798 | logging.basicConfig( | |
799 | level=log_level, | |
800 | format='%(levelname)-8s %(funcName)-20s %(message)s', | |
801 | stream=sys.stderr | |
802 | ) | |
f329fa92 | 803 | |
06ccae0f | 804 | # Parse command-line options |
805 | output_on_stdout = False | |
7afec95e | 806 | include_flags = [] |
06ccae0f | 807 | try: |
7afec95e | 808 | opts, args = getopt.getopt( argv, 'odI:', [ 'debug=', 'stdout' ] ) |
06ccae0f | 809 | for o, a in opts: |
810 | if o == '--debug': | |
811 | log_level = getattr( logging, a.upper(), None ) | |
812 | if not isinstance(log_level, int): | |
813 | raise getopt.GetoptError('log level must be one of: DEBUG, INFO, WARNING, ERROR, CRITICAL') | |
814 | elif o == '-d': | |
815 | log_level = logging.DEBUG | |
816 | elif o == '-o' or o == '--stdout': | |
06ccae0f | 817 | output_on_stdout = True |
7afec95e | 818 | elif o == '-I': |
819 | if os.path.isdir(a): | |
820 | include_flags.extend( [ '-I', a ] ) | |
821 | else: | |
822 | logging.fatal('Include directory not found: %s' % Colt(a).magenta()) | |
823 | return 2 | |
06ccae0f | 824 | else: |
825 | assert False, 'Unhandled argument' | |
826 | except getopt.GetoptError as e: | |
827 | logging.fatal('Invalid arguments: %s' % e) | |
828 | return 1 | |
f329fa92 | 829 | |
06ccae0f | 830 | logging.getLogger('').setLevel(log_level) |
f329fa92 | 831 | |
06ccae0f | 832 | # Attempt to load libclang from a list of known locations |
833 | libclang_locations = [ | |
834 | '/usr/lib/llvm-3.5/lib/libclang.so.1', | |
835 | '/usr/lib/libclang.so', | |
836 | '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib' | |
837 | ] | |
838 | libclang_found = False | |
f329fa92 | 839 | |
06ccae0f | 840 | for lib in libclang_locations: |
841 | if os.path.isfile(lib): | |
842 | clang.cindex.Config.set_library_file(lib) | |
843 | libclang_found = True | |
844 | break | |
f329fa92 | 845 | |
06ccae0f | 846 | if not libclang_found: |
847 | logging.fatal('Cannot find libclang') | |
848 | return 1 | |
849 | ||
850 | # Loop over all files | |
851 | for fn in args: | |
852 | ||
853 | logging.info('Input file: %s' % Colt(fn).magenta()) | |
854 | index = clang.cindex.Index.create() | |
7afec95e | 855 | clang_args = [ '-x', 'c++' ] |
856 | clang_args.extend( include_flags ) | |
857 | translation_unit = index.parse(fn, args=clang_args) | |
06ccae0f | 858 | |
859 | comments = [] | |
860 | traverse_ast( translation_unit.cursor, fn, comments ) | |
861 | for c in comments: | |
862 | ||
863 | logging.debug("Comment found for entity %s:" % Colt(c.func).magenta()) | |
f329fa92 | 864 | |
06ccae0f | 865 | if isinstance(c, MemberComment): |
866 | ||
21689d7a | 867 | if c.is_transient(): |
868 | flag_text = Colt('transient ').yellow() | |
869 | elif c.is_dontsplit(): | |
870 | flag_text = Colt('dontsplit ').yellow() | |
871 | elif c.is_ptr(): | |
872 | flag_text = Colt('ptr ').yellow() | |
06ccae0f | 873 | else: |
21689d7a | 874 | flag_text = '' |
06ccae0f | 875 | |
876 | if c.array_size is not None: | |
877 | array_text = Colt('arraysize=%s ' % c.array_size).yellow() | |
878 | else: | |
879 | array_text = '' | |
880 | ||
881 | logging.debug( | |
882 | "%s %s%s{%s}" % ( \ | |
883 | Colt("[%d,%d]" % (c.first_line, c.first_col)).green(), | |
21689d7a | 884 | flag_text, |
06ccae0f | 885 | array_text, |
886 | Colt(c.lines[0]).cyan() | |
887 | )) | |
888 | ||
889 | elif isinstance(c, RemoveComment): | |
890 | ||
891 | logging.debug( Colt('[%d,%d]' % (c.first_line, c.last_line)).green() ) | |
892 | ||
893 | else: | |
894 | for l in c.lines: | |
895 | logging.debug( | |
896 | Colt("[%d,%d:%d,%d] " % (c.first_line, c.first_col, c.last_line, c.last_col)).green() + | |
897 | "{%s}" % Colt(l).cyan() | |
898 | ) | |
f329fa92 | 899 | |
900 | try: | |
06ccae0f | 901 | |
902 | if output_on_stdout: | |
903 | with open(fn, 'r') as fhin: | |
904 | rewrite_comments( fhin, sys.stdout, comments ) | |
905 | else: | |
906 | fn_back = fn + '.thtml2doxy_backup' | |
907 | os.rename( fn, fn_back ) | |
908 | ||
909 | with open(fn_back, 'r') as fhin, open(fn, 'w') as fhout: | |
910 | rewrite_comments( fhin, fhout, comments ) | |
911 | ||
912 | os.remove( fn_back ) | |
913 | logging.info("File %s converted to Doxygen: check differences before committing!" % Colt(fn).magenta()) | |
914 | except (IOError,OSError) as e: | |
915 | logging.error('File operation failed: %s' % e) | |
f329fa92 | 916 | |
917 | return 0 | |
918 | ||
06ccae0f | 919 | |
f329fa92 | 920 | if __name__ == '__main__': |
06ccae0f | 921 | sys.exit( main( sys.argv[1:] ) ) |