]>
Commit | Line | Data |
---|---|---|
a4057fc9 | 1 | #!/usr/bin/env python |
2 | ||
3 | ## @package thtml2doxy_clang | |
4 | # Translates THtml C++ comments to Doxygen using libclang as parser. | |
5 | # | |
6 | # This code relies on Python bindings for libclang: libclang's interface is pretty unstable, and | |
7 | # its Python bindings are unstable as well. | |
8 | # | |
9 | # AST (Abstract Source Tree) traversal is performed entirely using libclang used as a C++ parser, | |
10 | # instead of attempting to write a parser ourselves. | |
11 | # | |
12 | # This code (expecially AST traversal) was inspired by: | |
13 | # | |
14 | # - [Implementing a code generator with libclang](http://szelei.me/code-generator/) | |
15 | # (this refers to API calls used here) | |
16 | # - [Parsing C++ in Python with Clang](http://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang) | |
17 | # (outdated, API calls described there do not work anymore, but useful to understand some basic | |
18 | # concepts) | |
19 | # | |
20 | # Usage: | |
21 | # | |
22 | # `thtml2doxy_clang file1 [file2 [file3...]]` | |
23 | # | |
24 | # @author Dario Berzano <dario.berzano@cern.ch> | |
25 | # @date 2014-12-05 | |
26 | ||
27 | ||
28 | import sys | |
29 | import os | |
30 | import re | |
62671ba0 | 31 | import logging |
32 | import getopt | |
a4057fc9 | 33 | import clang.cindex |
34 | ||
35 | ||
36 | ## Brain-dead color output for terminal. | |
37 | class Colt(str): | |
38 | ||
39 | def red(self): | |
40 | return self.color('\033[31m') | |
41 | ||
42 | def green(self): | |
43 | return self.color('\033[32m') | |
44 | ||
45 | def yellow(self): | |
46 | return self.color('\033[33m') | |
47 | ||
48 | def blue(self): | |
49 | return self.color('\033[34m') | |
50 | ||
51 | def magenta(self): | |
52 | return self.color('\033[35m') | |
53 | ||
54 | def cyan(self): | |
55 | return self.color('\033[36m') | |
56 | ||
57 | def color(self, c): | |
58 | return c + self + '\033[m' | |
59 | ||
60 | ||
61 | ## Traverse the AST recursively starting from the current cursor. | |
62 | # | |
63 | # @param cursor A Clang parser cursor | |
64 | # @param recursion Current recursion depth | |
65 | def traverse_ast(cursor, recursion=0): | |
66 | ||
67 | text = cursor.spelling or cursor.displayname | |
68 | kind = str(cursor.kind)[str(cursor.kind).index('.')+1:] | |
69 | ||
70 | indent = '' | |
71 | for i in range(0, recursion): | |
72 | indent = indent + ' ' | |
73 | ||
74 | if cursor.kind == clang.cindex.CursorKind.CXX_METHOD: | |
75 | ||
76 | # cursor ran into a C++ method | |
62671ba0 | 77 | logging.debug( "%s%s(%s)" % (indent, Colt(kind).magenta(), Colt(text).blue()) ) |
a4057fc9 | 78 | |
79 | # we are looking for the following structure: method -> compound statement -> comment, i.e. we | |
80 | # need to extract the first comment in the compound statement composing the method | |
81 | ||
82 | in_compound_stmt = False | |
83 | expect_comment = False | |
533918c9 | 84 | emit_comment = False |
85 | ||
86 | comment = [] | |
87 | comment_function = text | |
88 | comment_line_start = -1 | |
89 | comment_line_end = -1 | |
a4057fc9 | 90 | |
91 | for token in cursor.get_tokens(): | |
92 | ||
93 | if token.cursor.kind == clang.cindex.CursorKind.COMPOUND_STMT: | |
94 | if not in_compound_stmt: | |
95 | in_compound_stmt = True | |
96 | expect_comment = True | |
533918c9 | 97 | comment_line_end = -1 |
a4057fc9 | 98 | else: |
99 | if in_compound_stmt: | |
100 | in_compound_stmt = False | |
533918c9 | 101 | emit_comment = True |
a4057fc9 | 102 | |
103 | # tkind = str(token.kind)[str(token.kind).index('.')+1:] | |
104 | # ckind = str(token.cursor.kind)[str(token.cursor.kind).index('.')+1:] | |
105 | ||
106 | if in_compound_stmt: | |
107 | ||
108 | if expect_comment: | |
109 | ||
9a13b5a2 | 110 | extent = token.extent |
111 | line_start = extent.start.line | |
112 | line_end = extent.end.line | |
113 | ||
114 | if token.kind == clang.cindex.TokenKind.PUNCTUATION and token.spelling == '{': | |
115 | pass | |
116 | ||
533918c9 | 117 | elif token.kind == clang.cindex.TokenKind.COMMENT and (comment_line_end == -1 or (line_start == comment_line_end+1 and line_end-line_start == 0)): |
118 | comment_line_end = line_end | |
4e465d49 | 119 | |
533918c9 | 120 | if comment_line_start == -1: |
121 | comment_line_start = line_start | |
122 | comment.extend( refactor_comment(token.spelling) ) | |
a4057fc9 | 123 | |
78aaad66 | 124 | # multiline comments are parsed in one go, therefore don't expect subsequent comments |
125 | if line_end - line_start > 0: | |
533918c9 | 126 | emit_comment = True |
78aaad66 | 127 | expect_comment = False |
128 | ||
a4057fc9 | 129 | else: |
533918c9 | 130 | emit_comment = True |
a4057fc9 | 131 | expect_comment = False |
132 | ||
533918c9 | 133 | if emit_comment: |
134 | ||
135 | ||
136 | if len(comment) > 0: | |
137 | count_line = comment_line_start | |
138 | logging.info("Comment found for function %s" % Colt(comment_function).magenta()) | |
139 | for comment_line in comment: | |
140 | logging.info( | |
141 | Colt("%s [%d:%d] " % (indent, count_line, comment_line_end)).green() + | |
142 | Colt(comment_line).cyan() | |
143 | ) | |
144 | count_line = count_line + 1 | |
a4057fc9 | 145 | |
533918c9 | 146 | comment = [] |
147 | comment_line_start = -1 | |
148 | comment_line_end = -1 | |
149 | ||
150 | emit_comment = False | |
151 | break | |
a4057fc9 | 152 | |
153 | else: | |
154 | ||
62671ba0 | 155 | logging.debug( "%s%s(%s)" % (indent, kind, text) ) |
a4057fc9 | 156 | |
4e465d49 | 157 | for child_cursor in cursor.get_children(): |
158 | traverse_ast(child_cursor, recursion+1) | |
a4057fc9 | 159 | |
533918c9 | 160 | |
4e465d49 | 161 | ## Remove garbage from comments and convert special tags from THtml to Doxygen. |
162 | # | |
163 | # @param comment The original comment | |
164 | def refactor_comment(comment): | |
a4057fc9 | 165 | |
4e465d49 | 166 | resingle = r'^/{2,}\s*(.*?)\s*(/{2,})?\s*$' |
167 | remulti_first = r'^/\*\s*(.*?)\s*\*?\s*$' | |
168 | remulti_last = r'^\s*(.*?)\s*\*/$' | |
a4057fc9 | 169 | |
4e465d49 | 170 | new_comment = comment.split('\n') |
a4057fc9 | 171 | |
4e465d49 | 172 | if len(new_comment) == 1: |
173 | msingle = re.search(resingle, comment) | |
174 | if msingle: | |
175 | new_comment[0] = msingle.group(1) | |
a4057fc9 | 176 | |
4e465d49 | 177 | else: |
a4057fc9 | 178 | |
4e465d49 | 179 | for i in range(0, len(new_comment)): |
180 | if i == 0: | |
181 | mmulti = re.search(remulti_first, new_comment[i]) | |
182 | if mmulti: | |
183 | new_comment[i] = mmulti.group(1) | |
184 | elif i == len(new_comment)-1: | |
185 | mmulti = re.search(remulti_last, new_comment[i]) | |
186 | if mmulti: | |
187 | new_comment[i] = mmulti.group(1) | |
188 | else: | |
189 | new_comment[i] = new_comment[i].strip() | |
190 | ||
191 | return new_comment | |
a4057fc9 | 192 | |
193 | ||
194 | ## The main function. | |
195 | # | |
62671ba0 | 196 | # Return value is the executable's return value. |
a4057fc9 | 197 | def main(argv): |
198 | ||
62671ba0 | 199 | # Setup logging on stderr |
200 | log_level = logging.WARNING | |
201 | logging.basicConfig( | |
202 | level=log_level, | |
203 | format='%(levelname)-8s %(funcName)-20s %(message)s', | |
204 | stream=sys.stderr | |
205 | ) | |
206 | ||
207 | # Parse command-line options | |
208 | try: | |
209 | opts, args = getopt.getopt( argv, 'd', [ 'debug=' ] ) | |
210 | for o, a in opts: | |
211 | if o == '--debug': | |
212 | log_level = getattr( logging, a.upper(), None ) | |
213 | if not isinstance(log_level, int): | |
214 | raise getopt.GetoptError('log level must be one of: DEBUG, INFO, WARNING, ERROR, CRITICAL') | |
215 | elif o == '-d': | |
216 | log_level = logging.DEBUG | |
217 | else: | |
218 | assert False, 'Unhandled argument' | |
219 | except getopt.GetoptError as e: | |
220 | logging.fatal('Invalid arguments: %s' % e) | |
221 | return 1 | |
222 | ||
223 | logging.getLogger('').setLevel(log_level) | |
224 | ||
a4057fc9 | 225 | # Attempt to load libclang from a list of known locations |
226 | libclang_locations = [ | |
227 | '/usr/lib/llvm-3.5/lib/libclang.so.1', | |
228 | '/usr/lib/libclang.so', | |
229 | '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib' | |
230 | ] | |
231 | libclang_found = False | |
232 | ||
233 | for lib in libclang_locations: | |
234 | if os.path.isfile(lib): | |
235 | clang.cindex.Config.set_library_file(lib) | |
236 | libclang_found = True | |
237 | break | |
238 | ||
239 | if not libclang_found: | |
62671ba0 | 240 | logging.fatal('Cannot find libclang') |
a4057fc9 | 241 | return 1 |
242 | ||
243 | # Loop over all files | |
62671ba0 | 244 | for fn in args: |
a4057fc9 | 245 | |
533918c9 | 246 | logging.info('Input file: %s' % Colt(fn).magenta()) |
a4057fc9 | 247 | index = clang.cindex.Index.create() |
248 | translation_unit = index.parse(fn, args=['-x', 'c++']) | |
249 | traverse_ast( translation_unit.cursor ) | |
250 | ||
251 | return 0 | |
252 | ||
253 | ||
254 | if __name__ == '__main__': | |
62671ba0 | 255 | sys.exit( main( sys.argv[1:] ) ) |