doxygen/thtml2doxy_clang.py

   1 #!/usr/bin/env python
   2
   3 ## @package thtml2doxy_clang
   4 #  Translates THtml C++ comments to Doxygen using libclang as parser.
   5 #
   6 #  This code relies on Python bindings for libclang: libclang's interface is pretty unstable, and
   7 #  its Python bindings are unstable as well.
   8 #
   9 #  AST (Abstract Source Tree) traversal is performed entirely using libclang used as a C++ parser,
  10 #  instead of attempting to write a parser ourselves.
  11 #
  12 #  This code (expecially AST traversal) was inspired by:
  13 #
  14 #   - [Implementing a code generator with libclang](http://szelei.me/code-generator/)
  15 #     (this refers to API calls used here)
  16 #   - [Parsing C++ in Python with Clang](http://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang)
  17 #     (outdated, API calls described there do not work anymore, but useful to understand some basic
  18 #     concepts)
  19 #
  20 #  Usage:
  21 #
  22 #    `thtml2doxy_clang file1 [file2 [file3...]]`
  23 #
  24 #  @author Dario Berzano <dario.berzano@cern.ch>
  25 #  @date 2014-12-05
  26
  27
  28 import sys
  29 import os
  30 import re
  31 import logging
  32 import getopt
  33 import clang.cindex
  34
  35
  36 ## Brain-dead color output for terminal.
  37 class Colt(str):
  38
  39   def red(self):
  40     return self.color('\033[31m')
  41
  42   def green(self):
  43     return self.color('\033[32m')
  44
  45   def yellow(self):
  46     return self.color('\033[33m')
  47
  48   def blue(self):
  49     return self.color('\033[34m')
  50
  51   def magenta(self):
  52     return self.color('\033[35m')
  53
  54   def cyan(self):
  55     return self.color('\033[36m')
  56
  57   def color(self, c):
  58     return c + self + '\033[m'
  59
  60
  61 ## Traverse the AST recursively starting from the current cursor.
  62 #
  63 #  @param cursor    A Clang parser cursor
  64 #  @param recursion Current recursion depth
  65 def traverse_ast(cursor, recursion=0):
  66
  67   text = cursor.spelling or cursor.displayname
  68   kind = str(cursor.kind)[str(cursor.kind).index('.')+1:]
  69
  70   indent = ''
  71   for i in range(0, recursion):
  72     indent = indent + '  '
  73
  74   if cursor.kind == clang.cindex.CursorKind.CXX_METHOD:
  75
  76     # cursor ran into a C++ method
  77     logging.debug( "%s%s(%s)" % (indent, Colt(kind).magenta(), Colt(text).blue()) )
  78
  79     # we are looking for the following structure: method -> compound statement -> comment, i.e. we
  80     # need to extract the first comment in the compound statement composing the method
  81
  82     in_compound_stmt = False
  83     expect_comment = False
  84     last_comment_line = -1
  85
  86     for token in cursor.get_tokens():
  87
  88       if token.cursor.kind == clang.cindex.CursorKind.COMPOUND_STMT:
  89         if not in_compound_stmt:
  90           in_compound_stmt = True
  91           expect_comment = True
  92           last_comment_line = -1
  93       else:
  94         if in_compound_stmt:
  95           in_compound_stmt = False
  96           break
  97
  98       # tkind = str(token.kind)[str(token.kind).index('.')+1:]
  99       # ckind = str(token.cursor.kind)[str(token.cursor.kind).index('.')+1:]
 100
 101       if in_compound_stmt:
 102
 103         if expect_comment:
 104
 105           extent = token.extent
 106           line_start = extent.start.line
 107           line_end = extent.end.line
 108
 109           if token.kind == clang.cindex.TokenKind.PUNCTUATION and token.spelling == '{':
 110             pass
 111
 112           elif token.kind == clang.cindex.TokenKind.COMMENT and (last_comment_line == -1 or (line_start == last_comment_line+1 and line_end-line_start == 0)):
 113             #print Colt("%s  %s:%s = %s" % (indent, ckind, tkind, token.spelling)).green()
 114             last_comment_line = line_end
 115             new_comment = refactor_comment(token.spelling)
 116
 117             for comment_line in new_comment:
 118               logging.info(
 119                 Colt("%s  [%d-%d]" % (indent, line_start, line_end)).green() +
 120                 Colt(comment_line).cyan()
 121               )
 122
 123             # multiline comments are parsed in one go, therefore don't expect subsequent comments
 124             if line_end - line_start > 0:
 125               expect_comment = False
 126
 127           else:
 128             expect_comment = False
 129
 130       # else:
 131       #   print Colt("%s  %s:%s = %s" % (indent, ckind, tkind, token.spelling)).yellow()
 132
 133
 134   else:
 135
 136     logging.debug( "%s%s(%s)" % (indent, kind, text) )
 137
 138   for child_cursor in cursor.get_children():
 139     traverse_ast(child_cursor, recursion+1)
 140
 141 ## Remove garbage from comments and convert special tags from THtml to Doxygen.
 142 #
 143 #  @param comment The original comment
 144 def refactor_comment(comment):
 145
 146   resingle = r'^/{2,}\s*(.*?)\s*(/{2,})?\s*$'
 147   remulti_first = r'^/\*\s*(.*?)\s*\*?\s*$'
 148   remulti_last = r'^\s*(.*?)\s*\*/$'
 149
 150   new_comment = comment.split('\n')
 151
 152   if len(new_comment) == 1:
 153     msingle = re.search(resingle, comment)
 154     if msingle:
 155       new_comment[0] = msingle.group(1)
 156
 157   else:
 158
 159     for i in range(0, len(new_comment)):
 160       if i == 0:
 161         mmulti = re.search(remulti_first, new_comment[i])
 162         if mmulti:
 163           new_comment[i] = mmulti.group(1)
 164       elif i == len(new_comment)-1:
 165         mmulti = re.search(remulti_last, new_comment[i])
 166         if mmulti:
 167           new_comment[i] = mmulti.group(1)
 168       else:
 169         new_comment[i] = new_comment[i].strip()
 170
 171   return new_comment
 172
 173
 174 ## The main function.
 175 #
 176 #  Return value is the executable's return value.
 177 def main(argv):
 178
 179   # Setup logging on stderr
 180   log_level = logging.WARNING
 181   logging.basicConfig(
 182     level=log_level,
 183     format='%(levelname)-8s %(funcName)-20s %(message)s',
 184     stream=sys.stderr
 185   )
 186
 187   # Parse command-line options
 188   try:
 189     opts, args = getopt.getopt( argv, 'd', [ 'debug=' ] )
 190     for o, a in opts:
 191       if o == '--debug':
 192         log_level = getattr( logging, a.upper(), None )
 193         if not isinstance(log_level, int):
 194           raise getopt.GetoptError('log level must be one of: DEBUG, INFO, WARNING, ERROR, CRITICAL')
 195       elif o == '-d':
 196         log_level = logging.DEBUG
 197       else:
 198         assert False, 'Unhandled argument'
 199   except getopt.GetoptError as e:
 200     logging.fatal('Invalid arguments: %s' % e)
 201     return 1
 202
 203   logging.getLogger('').setLevel(log_level)
 204
 205   # Attempt to load libclang from a list of known locations
 206   libclang_locations = [
 207     '/usr/lib/llvm-3.5/lib/libclang.so.1',
 208     '/usr/lib/libclang.so',
 209     '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
 210   ]
 211   libclang_found = False
 212
 213   for lib in libclang_locations:
 214     if os.path.isfile(lib):
 215       clang.cindex.Config.set_library_file(lib)
 216       libclang_found = True
 217       break
 218
 219   if not libclang_found:
 220     logging.fatal('Cannot find libclang')
 221     return 1
 222
 223   # Loop over all files
 224   for fn in args:
 225
 226     index = clang.cindex.Index.create()
 227     translation_unit = index.parse(fn, args=['-x', 'c++'])
 228     traverse_ast( translation_unit.cursor )
 229
 230   return 0
 231
 232
 233 if __name__ == '__main__':
 234   sys.exit( main( sys.argv[1:] ) )