aboutsummaryrefslogtreecommitdiffstats
path: root/libc/kernel/tools/cpp.py
diff options
context:
space:
mode:
authorThe Android Open Source Project <initial-contribution@android.com>2009-03-03 18:28:13 -0800
committerThe Android Open Source Project <initial-contribution@android.com>2009-03-03 18:28:13 -0800
commit1767f908af327fa388b1c66883760ad851267013 (patch)
tree4b825dc642cb6eb9a060e54bf8d69288fbee4904 /libc/kernel/tools/cpp.py
parenta799b53f10e5a6fd51fef4436cfb7ec99836a516 (diff)
downloadandroid_bionic-1767f908af327fa388b1c66883760ad851267013.tar.gz
android_bionic-1767f908af327fa388b1c66883760ad851267013.tar.bz2
android_bionic-1767f908af327fa388b1c66883760ad851267013.zip
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libc/kernel/tools/cpp.py')
-rw-r--r--libc/kernel/tools/cpp.py2180
1 files changed, 0 insertions, 2180 deletions
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
deleted file mode 100644
index 4b4bd3826..000000000
--- a/libc/kernel/tools/cpp.py
+++ /dev/null
@@ -1,2180 +0,0 @@
-# a glorified C pre-processor parser
-
-import sys, re, string
-from utils import *
-from defaults import *
-
-debugTokens = False
-debugDirectiveTokenizer = False
-debugLineParsing = False
-debugCppExpr = False
-debugOptimIf01 = False
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P T O K E N S #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-# the list of supported C-preprocessor tokens
-# plus a couple of C tokens as well
-tokEOF = "\0"
-tokLN = "\n"
-tokSTRINGIFY = "#"
-tokCONCAT = "##"
-tokLOGICAND = "&&"
-tokLOGICOR = "||"
-tokSHL = "<<"
-tokSHR = ">>"
-tokEQUAL = "=="
-tokNEQUAL = "!="
-tokLT = "<"
-tokLTE = "<="
-tokGT = ">"
-tokGTE = ">="
-tokELLIPSIS = "..."
-tokSPACE = " "
-tokDEFINED = "defined"
-tokLPAREN = "("
-tokRPAREN = ")"
-tokNOT = "!"
-tokPLUS = "+"
-tokMINUS = "-"
-tokMULTIPLY = "*"
-tokDIVIDE = "/"
-tokMODULUS = "%"
-tokBINAND = "&"
-tokBINOR = "|"
-tokBINXOR = "^"
-tokCOMMA = ","
-tokLBRACE = "{"
-tokRBRACE = "}"
-tokARROW = "->"
-tokINCREMENT = "++"
-tokDECREMENT = "--"
-tokNUMBER = "<number>"
-tokIDENT = "<ident>"
-tokSTRING = "<string>"
-
-class Token:
- """a simple class to hold information about a given token.
- each token has a position in the source code, as well as
- an 'id' and a 'value'. the id is a string that identifies
- the token's class, while the value is the string of the
- original token itself.
-
- for example, the tokenizer concatenates a series of spaces
- and tabs as a single tokSPACE id, whose value if the original
- spaces+tabs sequence."""
-
- def __init__(self):
- self.id = None
- self.value = None
- self.lineno = 0
- self.colno = 0
-
- def set(self,id,val=None):
- self.id = id
- if val:
- self.value = val
- else:
- self.value = id
- return None
-
- def copyFrom(self,src):
- self.id = src.id
- self.value = src.value
- self.lineno = src.lineno
- self.colno = src.colno
-
- def __repr__(self):
- if self.id == tokIDENT:
- return "(ident %s)" % self.value
- if self.id == tokNUMBER:
- return "(number %s)" % self.value
- if self.id == tokSTRING:
- return "(string '%s')" % self.value
- if self.id == tokLN:
- return "<LN>"
- if self.id == tokEOF:
- return "<EOF>"
- if self.id == tokSPACE and self.value == "\\":
- # this corresponds to a trailing \ that was transformed into a tokSPACE
- return "<\\>"
-
- return self.id
-
- def __str__(self):
- if self.id == tokIDENT:
- return self.value
- if self.id == tokNUMBER:
- return self.value
- if self.id == tokSTRING:
- return self.value
- if self.id == tokEOF:
- return "<EOF>"
- if self.id == tokSPACE:
- if self.value == "\\": # trailing \
- return "\\\n"
- else:
- return self.value
-
- return self.id
-
-class BadExpectedToken(Exception):
- def __init__(self,msg):
- print msg
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P T O K E N C U R S O R #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-class TokenCursor:
- """a small class to iterate over a list of Token objects"""
- def __init__(self,tokens):
- self.tokens = tokens
- self.n = 0
- self.count = len(tokens)
-
- def set(self,n):
- """set the current position"""
- if n < 0:
- n = 0
- if n > self.count:
- n = self.count
- self.n = n
-
- def peekId(self):
- """retrieve the id of the current token"""
- if (self.n >= self.count):
- return None
- return self.tokens[self.n].id
-
- def peek(self):
- """retrieve the current token. does not change position"""
- if (self.n >= self.count):
- return None
- return self.tokens[self.n]
-
- def skip(self):
- """increase current token position"""
- if (self.n < self.count):
- self.n += 1
-
- def skipSpaces(self):
- """skip over all space tokens, this includes tokSPACE and tokLN"""
- while 1:
- tok = self.peekId()
- if tok != tokSPACE and tok != tokLN:
- break
- self.skip()
-
- def skipIfId(self,id):
- """skip an optional token"""
- if self.peekId() == id:
- self.skip()
-
- def expectId(self,id):
- """raise an exception if the current token hasn't a given id.
- otherwise skip over it"""
- tok = self.peek()
- if tok.id != id:
- raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
- self.skip()
-
- def remain(self):
- """return the list of remaining tokens"""
- return self.tokens[self.n:]
-
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P T O K E N I Z E R #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-# list of long symbols, i.e. those that take more than one characters
-cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
- tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
-
-class CppTokenizer:
- """an abstract class used to convert some input text into a list
- of tokens. real implementations follow and differ in the format
- of the input text only"""
-
- def __init__(self):
- """initialize a new CppTokenizer object"""
- self.eof = False # end of file reached ?
- self.text = None # content of current line, with final \n stripped
- self.line = 0 # number of current line
- self.pos = 0 # current character position in current line
- self.len = 0 # length of current line text
- self.held = Token()
-
- def setLineText(self,line):
- """set the content of the (next) current line. should be called
- by fillLineText() in derived classes"""
- self.text = line
- self.len = len(line)
- self.pos = 0
-
- def fillLineText(self):
- """refresh the content of 'line' with a new line of input"""
- # to be overriden
- self.eof = True
-
- def markPos(self,tok):
- """mark the position of the current token in the source file"""
- if self.eof or self.pos > self.len:
- tok.lineno = self.line + 1
- tok.colno = 0
- else:
- tok.lineno = self.line
- tok.colno = self.pos
-
- def peekChar(self):
- """return the current token under the cursor without moving it"""
- if self.eof:
- return tokEOF
-
- if self.pos > self.len:
- self.pos = 0
- self.line += 1
- self.fillLineText()
- if self.eof:
- return tokEOF
-
- if self.pos == self.len:
- return tokLN
- else:
- return self.text[self.pos]
-
- def peekNChar(self,n):
- """try to peek the next n chars on the same line"""
- if self.pos + n > self.len:
- return None
- return self.text[self.pos:self.pos+n]
-
- def skipChar(self):
- """increment the token cursor position"""
- if not self.eof:
- self.pos += 1
-
- def skipNChars(self,n):
- if self.pos + n <= self.len:
- self.pos += n
- else:
- while n > 0:
- self.skipChar()
- n -= 1
-
- def nextChar(self):
- """retrieve the token at the current cursor position, then skip it"""
- result = self.peekChar()
- self.skipChar()
- return result
-
- def getEscape(self):
- # try to get all characters after a backslash (\)
- result = self.nextChar()
- if result == "0":
- # octal number ?
- num = self.peekNChar(3)
- if num != None:
- isOctal = True
- for d in num:
- if not d in "01234567":
- isOctal = False
- break
- if isOctal:
- result += num
- self.skipNChars(3)
- elif result == "x" or result == "X":
- # hex number ?
- num = self.peekNChar(2)
- if num != None:
- isHex = True
- for d in num:
- if not d in "012345678abcdefABCDEF":
- isHex = False
- break
- if isHex:
- result += num
- self.skipNChars(2)
- elif result == "u" or result == "U":
- # unicode char ?
- num = self.peekNChar(4)
- if num != None:
- isHex = True
- for d in num:
- if not d in "012345678abcdefABCDEF":
- isHex = False
- break
- if isHex:
- result += num
- self.skipNChars(4)
-
- return result
-
- def nextRealToken(self,tok):
- """return next CPP token, used internally by nextToken()"""
- c = self.nextChar()
- if c == tokEOF or c == tokLN:
- return tok.set(c)
-
- if c == '/':
- c = self.peekChar()
- if c == '/': # C++ comment line
- self.skipChar()
- while 1:
- c = self.nextChar()
- if c == tokEOF or c == tokLN:
- break
- return tok.set(tokLN)
- if c == '*': # C comment start
- self.skipChar()
- value = "/*"
- prev_c = None
- while 1:
- c = self.nextChar()
- if c == tokEOF:
- #print "## EOF after '%s'" % value
- return tok.set(tokEOF,value)
- if c == '/' and prev_c == '*':
- break
- prev_c = c
- value += c
-
- value += "/"
- #print "## COMMENT: '%s'" % value
- return tok.set(tokSPACE,value)
- c = '/'
-
- if c.isspace():
- while 1:
- c2 = self.peekChar()
- if c2 == tokLN or not c2.isspace():
- break
- c += c2
- self.skipChar()
- return tok.set(tokSPACE,c)
-
- if c == '\\':
- if debugTokens:
- print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
- if self.peekChar() == tokLN: # trailing \
- # eat the tokLN
- self.skipChar()
- # we replace a trailing \ by a tokSPACE whose value is
- # simply "\\". this allows us to detect them later when
- # needed.
- return tok.set(tokSPACE,"\\")
- else:
- # treat as a single token here ?
- c +=self.getEscape()
- return tok.set(c)
-
- if c == "'": # chars
- c2 = self.nextChar()
- c += c2
- if c2 == '\\':
- c += self.getEscape()
-
- while 1:
- c2 = self.nextChar()
- if c2 == tokEOF:
- break
- c += c2
- if c2 == "'":
- break
-
- return tok.set(tokSTRING, c)
-
- if c == '"': # strings
- quote = 0
- while 1:
- c2 = self.nextChar()
- if c2 == tokEOF:
- return tok.set(tokSTRING,c)
-
- c += c2
- if not quote:
- if c2 == '"':
- return tok.set(tokSTRING,c)
- if c2 == "\\":
- quote = 1
- else:
- quote = 0
-
- if c >= "0" and c <= "9": # integers ?
- while 1:
- c2 = self.peekChar()
- if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
- break
- c += c2
- self.skipChar()
- return tok.set(tokNUMBER,c)
-
- if c.isalnum() or c == "_": # identifiers ?
- while 1:
- c2 = self.peekChar()
- if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
- break
- c += c2
- self.skipChar()
- if c == tokDEFINED:
- return tok.set(tokDEFINED)
- else:
- return tok.set(tokIDENT,c)
-
- # check special symbols
- for sk in cppLongSymbols:
- if c == sk[0]:
- sklen = len(sk[1:])
- if self.pos + sklen <= self.len and \
- self.text[self.pos:self.pos+sklen] == sk[1:]:
- self.pos += sklen
- return tok.set(sk)
-
- return tok.set(c)
-
- def nextToken(self,tok):
- """return the next token from the input text. this function
- really updates 'tok', and does not return a new one"""
- self.markPos(tok)
- self.nextRealToken(tok)
-
- def getToken(self):
- tok = Token()
- self.nextToken(tok)
- if debugTokens:
- print "getTokens: %s" % repr(tok)
- return tok
-
- def toTokenList(self):
- """convert the input text of a CppTokenizer into a direct
- list of token objects. tokEOF is stripped from the result"""
- result = []
- while 1:
- tok = Token()
- self.nextToken(tok)
- if tok.id == tokEOF:
- break
- result.append(tok)
- return result
-
-class CppLineTokenizer(CppTokenizer):
- """a CppTokenizer derived class that accepts a single line of text as input"""
- def __init__(self,line,lineno=1):
- CppTokenizer.__init__(self)
- self.line = lineno
- self.setLineText(line)
-
-
-class CppLinesTokenizer(CppTokenizer):
- """a CppTokenizer derived class that accepts a list of texdt lines as input.
- the lines must not have a trailing \n"""
- def __init__(self,lines=[],lineno=1):
- """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
- CppTokenizer.__init__(self)
- self.line = lineno
- self.lines = lines
- self.index = 0
- self.count = len(lines)
-
- if self.count > 0:
- self.fillLineText()
- else:
- self.eof = True
-
- def addLine(self,line):
- """add a line to a CppLinesTokenizer. this can be done after tokenization
- happens"""
- if self.count == 0:
- self.setLineText(line)
- self.index = 1
- self.lines.append(line)
- self.count += 1
- self.eof = False
-
- def fillLineText(self):
- if self.index < self.count:
- self.setLineText(self.lines[self.index])
- self.index += 1
- else:
- self.eof = True
-
-
-class CppFileTokenizer(CppTokenizer):
- def __init__(self,file,lineno=1):
- CppTokenizer.__init__(self)
- self.file = file
- self.line = lineno
-
- def fillLineText(self):
- line = self.file.readline()
- if len(line) > 0:
- if line[-1] == '\n':
- line = line[:-1]
- if len(line) > 0 and line[-1] == "\r":
- line = line[:-1]
- self.setLineText(line)
- else:
- self.eof = True
-
-# Unit testing
-#
-class CppTokenizerTester:
- """a class used to test CppTokenizer classes"""
- def __init__(self,tokenizer=None):
- self.tokenizer = tokenizer
- self.token = Token()
-
- def setTokenizer(self,tokenizer):
- self.tokenizer = tokenizer
-
- def expect(self,id):
- self.tokenizer.nextToken(self.token)
- tokid = self.token.id
- if tokid == id:
- return
- if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
- return
- raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
-
- def expectToken(self,id,line,col):
- self.expect(id)
- if self.token.lineno != line:
- raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
- if self.token.colno != col:
- raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
-
- def expectTokenVal(self,id,value,line,col):
- self.expectToken(id,line,col)
- if self.token.value != value:
- raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
-
- def expectList(self,list):
- for item in list:
- self.expect(item)
-
-def test_CppTokenizer():
- print "running CppTokenizer tests"
- tester = CppTokenizerTester()
-
- tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") )
- tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
- tokRPAREN, tokLN, tokEOF] )
-
- tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
- tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
- tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
-
- tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
- tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
-
- tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
- tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
-
- tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) )
- tester.expectToken( "first", 1, 0 )
- tester.expectToken( tokSPACE, 1, 5 )
- tester.expectToken( "second", 1, 6 )
- tester.expectToken( tokLN, 1, 12 )
- tester.expectToken( tokSPACE, 2, 0 )
- tester.expectToken( "third", 2, 2 )
-
- tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
- tester.expectList( [ "boo", tokSPACE ] )
- tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
- tester.expectList( [ tokLN, tokEOF ] )
-
- tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
- tester.expectToken( "an", 1, 0 )
- tester.expectToken( tokSPACE, 1, 2 )
- tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
- tester.expectToken( tokSPACE, 2, 0 )
- tester.expectToken( "example", 2, 1 )
- tester.expectToken( tokLN, 2, 8 )
-
- return True
-
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P E X P R E S S I O N S #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-# Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
-# op is an "operator" string
-
-class Expr:
- """a class used to model a CPP expression"""
- opInteger = "int"
- opIdent = "ident"
- opCall = "call"
- opDefined = "defined"
- opTest = "?"
- opLogicNot = "!"
- opNot = "~"
- opNeg = "[-]"
- opUnaryPlus = "[+]"
- opAdd = "+"
- opSub = "-"
- opMul = "*"
- opDiv = "/"
- opMod = "%"
- opAnd = "&"
- opOr = "|"
- opXor = "^"
- opLogicAnd = "&&"
- opLogicOr = "||"
- opEqual = "=="
- opNotEqual = "!="
- opLess = "<"
- opLessEq = "<="
- opGreater = ">"
- opGreaterEq = ">="
- opShl = "<<"
- opShr = ">>"
-
- unaries = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
- binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
- opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
-
- precedences = {
- opTest: 0,
- opLogicOr: 1,
- opLogicNot: 2,
- opOr : 3,
- opXor: 4,
- opAnd: 5,
- opEqual: 6, opNotEqual: 6,
- opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
- opShl:8, opShr:8,
- opAdd:9, opSub:9,
- opMul:10, opDiv:10, opMod:10,
- opLogicNot:11,
- opNot: 12,
- }
-
- def __init__(self,op):
- self.op = op
-
- def __repr__(self):
- return "(%s)" % self.op
-
- def __str__(self):
- return "operator(%s)" % self.op
-
- def precedence(self):
- """return the precedence of a given operator"""
- return Expr.precedences.get(self.op, 1000)
-
- def isUnary(self):
- return self.op in Expr.unaries
-
- def isBinary(self):
- return self.op in Expr.binaries
-
- def isDefined(self):
- return self.op is opDefined
-
- def toInt(self):
- """return the integer value of a given expression. only valid for integer expressions
- will return None otherwise"""
- return None
-
-class IntExpr(Expr):
- def __init__(self,value):
- Expr.__init__(self,opInteger)
- self.arg = value
-
- def __repr__(self):
- return "(int %s)" % self.arg
-
- def __str__(self):
- return self.arg
-
- def toInt(self):
- s = self.arg # string value
- # get rid of U or L suffixes
- while len(s) > 0 and s[-1] in "LUlu":
- s = s[:-1]
- return string.atoi(s)
-
-class IdentExpr(Expr):
- def __init__(self,name):
- Expr.__init__(self,opIdent)
- self.name = name
-
- def __repr__(self):
- return "(ident %s)" % self.name
-
- def __str__(self):
- return self.name
-
-class CallExpr(Expr):
- def __init__(self,funcname,params):
- Expr.__init__(self,opCall)
- self.funcname = funcname
- self.params = params
-
- def __repr__(self):
- result = "(call %s [" % self.funcname
- comma = ""
- for param in self.params:
- result += "%s%s" % (comma, repr(param))
- comma = ","
- result += "])"
- return result
-
- def __str__(self):
- result = "%s(" % self.funcname
- comma = ""
- for param in self.params:
- result += "%s%s" % (comma, str(param))
- comma = ","
-
- result += ")"
- return result
-
-class TestExpr(Expr):
- def __init__(self,cond,iftrue,iffalse):
- Expr.__init__(self,opTest)
- self.cond = cond
- self.iftrue = iftrue
- self.iffalse = iffalse
-
- def __repr__(self):
- return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
-
- def __str__(self):
- return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
-
-class SingleArgExpr(Expr):
- def __init__(self,op,arg):
- Expr.__init__(self,op)
- self.arg = arg
-
- def __repr__(self):
- return "(%s %s)" % (self.op, repr(self.arg))
-
-class DefinedExpr(SingleArgExpr):
- def __init__(self,op,macroname):
- SingleArgExpr.__init__(self.opDefined,macroname)
-
- def __str__(self):
- return "defined(%s)" % self.arg
-
-
-class UnaryExpr(SingleArgExpr):
- def __init__(self,op,arg,opstr=None):
- SingleArgExpr.__init__(self,op,arg)
- if not opstr:
- opstr = op
- self.opstr = opstr
-
- def __str__(self):
- arg_s = str(self.arg)
- arg_prec = self.arg.precedence()
- self_prec = self.precedence()
- if arg_prec < self_prec:
- return "%s(%s)" % (self.opstr,arg_s)
- else:
- return "%s%s" % (self.opstr, arg_s)
-
-class TwoArgExpr(Expr):
- def __init__(self,op,arg1,arg2):
- Expr.__init__(self,op)
- self.arg1 = arg1
- self.arg2 = arg2
-
- def __repr__(self):
- return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
-
-class BinaryExpr(TwoArgExpr):
- def __init__(self,op,arg1,arg2,opstr=None):
- TwoArgExpr.__init__(self,op,arg1,arg2)
- if not opstr:
- opstr = op
- self.opstr = opstr
-
- def __str__(self):
- arg1_s = str(self.arg1)
- arg2_s = str(self.arg2)
- arg1_prec = self.arg1.precedence()
- arg2_prec = self.arg2.precedence()
- self_prec = self.precedence()
-
- result = ""
- if arg1_prec < self_prec:
- result += "(%s)" % arg1_s
- else:
- result += arg1_s
-
- result += " %s " % self.opstr
-
- if arg2_prec < self_prec:
- result += "(%s)" % arg2_s
- else:
- result += arg2_s
-
- return result
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P E X P R E S S I O N P A R S E R #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-
-class ExprParser:
- """a class used to convert a list of tokens into a cpp Expr object"""
-
- re_octal = re.compile(r"\s*\(0[0-7]+\).*")
- re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
- re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
-
- def __init__(self,tokens):
- self.tok = tokens
- self.n = len(self.tok)
- self.i = 0
-
- def mark(self):
- return self.i
-
- def release(self,pos):
- self.i = pos
-
- def peekId(self):
- if self.i < self.n:
- return self.tok[self.i].id
- return None
-
- def peek(self):
- if self.i < self.n:
- return self.tok[self.i]
- return None
-
- def skip(self):
- if self.i < self.n:
- self.i += 1
-
- def skipOptional(self,id):
- if self.i < self.n and self.tok[self.i].id == id:
- self.i += 1
-
- def skipSpaces(self):
- i = self.i
- n = self.n
- tok = self.tok
- while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
- i += 1
- self.i = i
-
- # all the isXXX functions returns a (expr,nextpos) pair if a match is found
- # or None if not
-
- def is_integer(self):
- id = self.tok[self.i].id
- c = id[0]
- if c < '0' or c > '9':
- return None
-
- m = ExprParser.re_octal.match(id)
- if m:
- return (IntExpr(id), m.end(1))
-
- m = ExprParser.re_decimal.match(id)
- if m:
- return (IntExpr(id), m.end(1))
-
- m = ExprParser.re_hexadecimal(id)
- if m:
- return (IntExpr(id), m.end(1))
-
- return None
-
- def is_defined(self):
- id = self.tok[self.i].id
- if id != "defined":
- return None
-
- pos = self.mark()
-
- use_paren = 0
- if self.peekId() == tokLPAREN:
- self.skip()
- use_paren = 1
-
- if self.peekId() != tokIDENT:
- self.throw( BadExpectedToken, "identifier expected")
-
- macroname = self.peek().value
- self.skip()
- if use_paren:
- self.skipSpaces()
- if self.peekId() != tokRPAREN:
- self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
- self.skip()
-
- i = self.i
- return (DefinedExpr(macroname),i+1)
-
- def is_call_or_ident(self):
- pass
-
- def parse(self, i):
- return None
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P E X P R E S S I O N S #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-class CppInvalidExpression(Exception):
- """an exception raised when an invalid/unsupported cpp expression is detected"""
- pass
-
-class CppExpr:
- """a class that models the condition of #if directives into
- an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
- where "op" is a string describing the operation"""
-
- unaries = [ "!", "~" ]
- binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
- precedences = { "||": 1,
- "&&": 2,
- "|": 3,
- "^": 4,
- "&": 5,
- "==":6, "!=":6,
- "<":7, "<=":7, ">":7, ">=":7,
- "<<":8, ">>":8,
- "+":9, "-":9,
- "*":10, "/":10, "%":10,
- "!":11, "~":12
- }
-
- def __init__(self, tokens):
- """initialize a CppExpr. 'tokens' must be a CppToken list"""
- self.tok = tokens
- self.n = len(tokens)
- if debugCppExpr:
- print "CppExpr: trying to parse %s" % repr(tokens)
- expr = self.is_expr(0)
- if debugCppExpr:
- print "CppExpr: got " + repr(expr)
- self.expr = expr[0]
-
- re_cpp_constant = re.compile(r"((\d|\w|_)+)")
-
- def throw(self,exception,i,msg):
- if i < self.n:
- tok = self.tok[i]
- print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
- else:
- print "EOF: %s" % msg
- raise exception
-
- def skip_spaces(self,i):
- """skip spaces in input token list"""
- while i < self.n:
- t = self.tok[i]
- if t.id != tokSPACE and t.id != tokLN:
- break
- i += 1
- return i
-
- def expectId(self,i,id):
- """check that a given token id is at the current position, then skip over it"""
- i = self.skip_spaces(i)
- if i >= self.n or self.tok[i].id != id:
- self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
- return i+1
-
- def expectIdent(self,i):
- i = self.skip_spaces(i)
- if i >= self.n or self.tok[i].id != tokIDENT:
- self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
- return i+1
-
- # the is_xxxxx function returns either None or a pair (e,nextpos)
- # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
- # the corresponding next position in the input token list
- #
-
- def is_decimal(self,i):
- v = self.tok[i].value[:]
- while len(v) > 0 and v[-1] in "ULul":
- v = v[:-1]
- for digit in v:
- if not digit.isdigit():
- return None
-
- # for an integer expression tuple, the argument
- # is simply the value as an integer
- val = string.atoi(v)
- return ("int", val), i+1
-
- def is_hexadecimal(self,i):
- v = self.tok[i].value[:]
- while len(v) > 0 and v[-1] in "ULul":
- v = v[:-1]
- if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
- for digit in v[2:]:
- if not digit in "0123456789abcdefABCDEF":
- return None
-
- # for an hex expression tuple, the argument
- # is the value as an integer
- val = int(v[2:], 16)
- return ("hex", val), i+1
-
- return None
-
- def is_integer(self,i):
- if self.tok[i].id != tokNUMBER:
- return None
-
- c = self.is_decimal(i)
- if c: return c
-
- c = self.is_hexadecimal(i)
- if c: return c
-
- return None
-
- def is_number(self,i):
- t = self.tok[i]
- if t.id == tokMINUS and i+1 < self.n:
- c = self.is_integer(i+1)
- if c:
- e, i2 = c
- op, val = e
- return (op, -val), i2
- if t.id == tokPLUS and i+1 < self.n:
- c = self.is_integer(i+1)
- if c: return c
-
- return self.is_integer(i)
-
-
- def is_alnum(self,i):
- """test wether a given token is alpha-numeric"""
- i = self.skip_spaces(i)
- if i >= self.n:
- return None
- t = self.tok[i]
- m = CppExpr.re_cpp_constant.match(t.id)
- if m:
- #print "... alnum '%s'" % m.group(1)
- r = m.group(1)
- return ("ident", r), i+1
- return None
-
- def is_defined(self,i):
- t = self.tok[i]
- if t.id != tokDEFINED:
- return None
-
- # we have the defined keyword, check the rest
- i = self.skip_spaces(i+1)
- use_parens = 0
- if i < self.n and self.tok[i].id == tokLPAREN:
- use_parens = 1
- i = self.skip_spaces(i+1)
-
- if i >= self.n:
- self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren")
-
- t = self.tok[i]
- if t.id != tokIDENT:
- self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
-
- i += 1
- if use_parens:
- i = self.expectId(i,tokRPAREN)
-
- return ("defined",t.value), i
-
-
- def is_call_or_ident(self,i):
- i = self.skip_spaces(i)
- if i >= self.n:
- return None
-
- t = self.tok[i]
- if t.id != tokIDENT:
- return None
-
- name = t.value
-
- i = self.skip_spaces(i+1)
- if i >= self.n or self.tok[i].id != tokLPAREN:
- return ("ident", name), i
-
- params = []
- depth = 1
- i += 1
- j = i
- while i < self.n:
- id = self.tok[i].id
- if id == tokLPAREN:
- depth += 1
- elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
- while j < i and self.tok[j].id == tokSPACE:
- j += 1
- k = i
- while k > j and self.tok[k-1].id == tokSPACE:
- k -= 1
- param = self.tok[j:k]
- params.append( param )
- if id == tokRPAREN:
- break
- j = i+1
- elif id == tokRPAREN:
- depth -= 1
- i += 1
-
- if i >= self.n:
- return None
-
- return ("call", (name, params)), i+1
-
- def is_token(self,i,token):
- i = self.skip_spaces(i)
- if i >= self.n or self.tok[i].id != token:
- return None
- return token, i+1
-
-
- def is_value(self,i):
- t = self.tok[i]
- if t.id == tokSTRING:
- return ("string", t.value), i+1
-
- c = self.is_number(i)
- if c: return c
-
- c = self.is_defined(i)
- if c: return c
-
- c = self.is_call_or_ident(i)
- if c: return c
-
- i = self.skip_spaces(i)
- if i >= self.n or self.tok[i].id != tokLPAREN:
- return None
-
- popcount = 1
- i2 = i+1
- while i2 < self.n:
- t = self.tok[i2]
- if t.id == tokLPAREN:
- popcount += 1
- elif t.id == tokRPAREN:
- popcount -= 1
- if popcount == 0:
- break
- i2 += 1
-
- if popcount != 0:
- self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
-
- if debugCppExpr:
- print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
- oldcount = self.n
- self.n = i2
- c = self.is_expr(i+1)
- self.n = oldcount
- if not c:
- self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
-
- e, i = c
- return e, i2+1
-
- def is_unary(self,i):
- i = self.skip_spaces(i)
- if i >= self.n:
- return None
-
- t = self.tok[i]
- if t.id in CppExpr.unaries:
- c = self.is_unary(i+1)
- if not c:
- self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
- e, i = c
- return (t.id, e), i
-
- return self.is_value(i)
-
- def is_binary(self,i):
- i = self.skip_spaces(i)
- if i >= self.n:
- return None
-
- c = self.is_unary(i)
- if not c:
- return None
-
- e1, i2 = c
- i2 = self.skip_spaces(i2)
- if i2 >= self.n:
- return c
-
- t = self.tok[i2]
- if t.id in CppExpr.binaries:
- c = self.is_binary(i2+1)
- if not c:
- self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
- e2, i3 = c
- return (t.id, e1, e2), i3
-
- return None
-
- def is_expr(self,i):
- return self.is_binary(i)
-
- def dump_node(self,e):
- op = e[0]
- line = "(" + op
- if op == "int":
- line += " %d)" % e[1]
- elif op == "hex":
- line += " 0x%x)" % e[1]
- elif op == "ident":
- line += " %s)" % e[1]
- elif op == "defined":
- line += " %s)" % e[1]
- elif op == "call":
- arg = e[1]
- line += " %s [" % arg[0]
- prefix = ""
- for param in arg[1]:
- par = ""
- for tok in param:
- par += str(tok)
- line += "%s%s" % (prefix, par)
- prefix = ","
- line += "])"
- elif op in CppExpr.unaries:
- line += " %s)" % self.dump_node(e[1])
- elif op in CppExpr.binaries:
- line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
- else:
- line += " ?%s)" % repr(e[1])
-
- return line
-
- def __repr__(self):
- return self.dump_node(self.expr)
-
- def source_node(self,e):
- op = e[0]
- if op == "int":
- return "%d" % e[1]
- if op == "hex":
- return "0x%x" % e[1]
- if op == "ident":
- # XXX: should try to expand
- return e[1]
- if op == "defined":
- return "defined(%s)" % e[1]
-
- prec = CppExpr.precedences.get(op,1000)
- arg = e[1]
- if op in CppExpr.unaries:
- arg_src = self.source_node(arg)
- arg_op = arg[0]
- arg_prec = CppExpr.precedences.get(arg[0],1000)
- if arg_prec < prec:
- return "!(" + arg_src + ")"
- else:
- return "!" + arg_src
- if op in CppExpr.binaries:
- arg2 = e[2]
- arg1_op = arg[0]
- arg2_op = arg2[0]
- arg1_src = self.source_node(arg)
- arg2_src = self.source_node(arg2)
- if CppExpr.precedences.get(arg1_op,1000) < prec:
- arg1_src = "(%s)" % arg1_src
- if CppExpr.precedences.get(arg2_op,1000) < prec:
- arg2_src = "(%s)" % arg2_src
-
- return "%s %s %s" % (arg1_src, op, arg2_src)
- return "???"
-
- def __str__(self):
- return self.source_node(self.expr)
-
- def int_node(self,e):
- if e[0] == "int":
- return e[1]
- elif e[1] == "hex":
- return int(e[1],16)
- else:
- return None
-
- def toInt(self):
- return self.int_node(self.expr)
-
- def optimize_node(self,e,macros={}):
- op = e[0]
- if op == "defined":
- name = e[1]
- if macros.has_key(name):
- if macros[name] == kCppUndefinedMacro:
- return ("int", 0)
- else:
- return ("int", 1)
-
- if kernel_remove_config_macros and name.startswith("CONFIG_"):
- return ("int", 0)
-
- elif op == "!":
- op, v = e
- v = self.optimize_node(v, macros)
- if v[0] == "int":
- if v[1] == 0:
- return ("int", 1)
- else:
- return ("int", 0)
-
- elif op == "&&":
- op, l, r = e
- l = self.optimize_node(l, macros)
- r = self.optimize_node(r, macros)
- li = self.int_node(l)
- ri = self.int_node(r)
- if li != None:
- if li == 0:
- return ("int", 0)
- else:
- return r
-
- elif op == "||":
- op, l, r = e
- l = self.optimize_node(l, macros)
- r = self.optimize_node(r, macros)
- li = self.int_node(l)
- ri = self.int_node(r)
- if li != None:
- if li == 0:
- return r
- else:
- return ("int", 1)
- elif ri != None:
- if ri == 0:
- return l
- else:
- return ("int", 1)
- return e
-
- def optimize(self,macros={}):
- self.expr = self.optimize_node(self.expr,macros)
-
- def removePrefixedNode(self,e,prefix,names):
- op = e[0]
- if op == "defined":
- name = e[1]
- if name.startswith(prefix):
- if names.has_key[name] and names[name] == "y":
- return ("int", 1)
- else:
- return ("int", 0)
-
- elif op in CppExpr.unaries:
- op, v = e
- v = self.removePrefixedNode(v,prefix,names)
- return (op, v)
- elif op in CppExpr.binaries:
- op, v1, v2 = e
- v1 = self.removePrefixedNode(v1,prefix,names)
- v2 = self.removePrefixedNode(v2,prefix,names)
- return (op, v1, v2)
- elif op == "call":
- func, params = e[1]
- params2 = []
- for param in params:
- params2.append( self.removePrefixedNode(param,prefix,names) )
- return (op, (func, params2))
-
- return e
-
- def removePrefixed(self,prefix,names={}):
- self.expr = self.removePrefixedNode(self.expr,prefix,names)
-
- def is_equal_node(self,e1,e2):
- if e1[0] != e2[0] or len(e1) != len(e2):
- return False
-
- op = e1[0]
- if op == "int" or op == "hex" or op == "!" or op == "defined":
- return e1[0] == e2[0]
-
- return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
-
- def is_equal(self,other):
- return self.is_equal_node(self.expr,other.expr)
-
-def test_cpp_expr(expr, expected):
- e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
- #print repr(e.expr)
- s1 = repr(e)
- if s1 != expected:
- print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
- else:
- #print "OK: expression '%s'" % expr
- pass
-
-def test_cpp_expr_optim(expr, expected, macros={}):
- e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
- e.optimize(macros)
-
- s1 = repr(e)
- if s1 != expected:
- print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
- else:
- #print "OK: optmized expression '%s'" % expr
- pass
-
-def test_cpp_expr_source(expr, expected):
- e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
- s1 = str(e)
- if s1 != expected:
- print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
- else:
- #print "OK: source expression '%s'" % expr
- pass
-
-def test_CppExpr():
- print "testing CppExpr"
- test_cpp_expr( "0", "(int 0)" )
- test_cpp_expr( "1", "(int 1)" )
- test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
- test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
- test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
- test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
- test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
- test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
- test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
- test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
-
- test_cpp_expr_optim( "0", "(int 0)" )
- test_cpp_expr_optim( "1", "(int 1)" )
- test_cpp_expr_optim( "1 && 1", "(int 1)" )
- test_cpp_expr_optim( "1 && 0", "(int 0)" )
- test_cpp_expr_optim( "0 && 1", "(int 0)" )
- test_cpp_expr_optim( "0 && 0", "(int 0)" )
- test_cpp_expr_optim( "1 || 1", "(int 1)" )
- test_cpp_expr_optim( "1 || 0", "(int 1)" )
- test_cpp_expr_optim( "0 || 1", "(int 1)" )
- test_cpp_expr_optim( "0 || 0", "(int 0)" )
- test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
- test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
- test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
- test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
- test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
- test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
- test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
- test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
- test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
- test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
- test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
- test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
- test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
-
- test_cpp_expr_source( "0", "0" )
- test_cpp_expr_source( "1", "1" )
- test_cpp_expr_source( "1 && 1", "1 && 1" )
- test_cpp_expr_source( "1 && 0", "1 && 0" )
- test_cpp_expr_source( "0 && 1", "0 && 1" )
- test_cpp_expr_source( "0 && 0", "0 && 0" )
- test_cpp_expr_source( "1 || 1", "1 || 1" )
- test_cpp_expr_source( "1 || 0", "1 || 0" )
- test_cpp_expr_source( "0 || 1", "0 || 1" )
- test_cpp_expr_source( "0 || 0", "0 || 0" )
- test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
- test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
- test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
- test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
-
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### C P P B L O C K #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-class Block:
- """a class used to model a block of input source text. there are two block types:
- - direcive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
- - text blocks, contain the tokens of non-directive blocks
-
- the cpp parser class below will transform an input source file into a list of Block
- objects (grouped in a BlockList object for convenience)"""
-
- def __init__(self,tokens,directive=None,lineno=0):
- """initialize a new block, if 'directive' is None, this is a text block
- NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
- and '#ifndef MACRO' into '#if !defined(MACRO)'"""
- if directive == "ifdef":
- tok = Token()
- tok.set(tokDEFINED)
- tokens = [ tok ] + tokens
- directive = "if"
-
- elif directive == "ifndef":
- tok1 = Token()
- tok2 = Token()
- tok1.set(tokNOT)
- tok2.set(tokDEFINED)
- tokens = [ tok1, tok2 ] + tokens
- directive = "if"
-
- self.tokens = tokens
- self.directive = directive
- if lineno > 0:
- self.lineno = lineno
- else:
- self.lineno = self.tokens[0].lineno
-
- if self.isIf():
- self.expr = CppExpr( self.tokens )
-
- def isDirective(self):
- """returns True iff this is a directive block"""
- return self.directive != None
-
- def isConditional(self):
- """returns True iff this is a conditional directive block"""
- return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
-
- def isDefine(self):
- """returns the macro name in a #define directive, or None otherwise"""
- if self.directive != "define":
- return None
-
- return self.tokens[0].value
-
- def isIf(self):
- """returns True iff this is an #if-like directive block"""
- return self.directive in ["if","ifdef","ifndef","elif"]
-
- def isInclude(self):
- """checks wether this is a #include directive. if true, then returns the
- corresponding file name (with brackets or double-qoutes). None otherwise"""
- if self.directive != "include":
- return None
-
- #print "iii " + repr(self.tokens)
- if self.tokens[0].id == tokSTRING:
- # a double-quote include, that's easy
- return self.tokens[0].value
-
- # we only want the bracket part, not any comments or junk after it
- if self.tokens[0].id == "<":
- i = 0
- tok = self.tokens
- n = len(tok)
- while i < n and tok[i].id != ">":
- i += 1
-
- if i >= n:
- return None
-
- return string.join([ str(x) for x in tok[:i+1] ],"")
-
- else:
- return None
-
- def __repr__(self):
- """generate the representation of a given block"""
- if self.directive:
- result = "#%s " % self.directive
- if self.isIf():
- result += repr(self.expr)
- else:
- for tok in self.tokens:
- result += repr(tok)
- else:
- result = ""
- for tok in self.tokens:
- result += repr(tok)
-
- return result
-
- def __str__(self):
- """generate the string representation of a given block"""
- if self.directive:
- if self.directive == "if":
- # small optimization to re-generate #ifdef and #ifndef
- e = self.expr.expr
- op = e[0]
- if op == "defined":
- result = "#ifdef %s" % e[1]
- elif op == "!" and e[1][0] == "defined":
- result = "#ifndef %s" % e[1][1]
- else:
- result = "#if " + str(self.expr)
- else:
- result = "#%s" % self.directive
- if len(self.tokens):
- result += " "
- for tok in self.tokens:
- result += str(tok)
- else:
- result = ""
- for tok in self.tokens:
- result += str(tok)
-
- return result
-
-
-class BlockList:
- """a convenience class used to hold and process a list of blocks returned by
- the cpp parser"""
- def __init__(self,blocks):
- self.blocks = blocks
-
- def __len__(self):
- return len(self.blocks)
-
- def __getitem__(self,n):
- return self.blocks[n]
-
- def __repr__(self):
- return repr(self.blocks)
-
- def __str__(self):
- result = ""
- for b in self.blocks:
- result += str(b)
- if b.isDirective():
- result += '\n'
- return result
-
- def optimizeIf01(self):
- """remove the code between #if 0 .. #endif in a BlockList"""
- self.blocks = optimize_if01(self.blocks)
-
- def optimizeMacros(self, macros):
- """remove known defined and undefined macros from a BlockList"""
- for b in self.blocks:
- if b.isIf():
- b.expr.optimize(macros)
-
- def removeMacroDefines(self,macros):
- """remove known macro definitions from a BlockList"""
- self.blocks = remove_macro_defines(self.blocks,macros)
-
- def removePrefixed(self,prefix,names):
- for b in self.blocks:
- if b.isIf():
- b.expr.removePrefixed(prefix,names)
-
- def optimizeAll(self,macros):
- self.optimizeMacros(macros)
- self.optimizeIf01()
- return
-
- def findIncludes(self):
- """return the list of included files in a BlockList"""
- result = []
- for b in self.blocks:
- i = b.isInclude()
- if i:
- result.append(i)
-
- return result
-
-
- def write(self,out):
- out.write(str(self))
-
- def removeComments(self):
- for b in self.blocks:
- for tok in b.tokens:
- if tok.id == tokSPACE:
- tok.value = " "
-
- def removeEmptyLines(self):
- # state = 1 => previous line was tokLN
- # state = 0 => previous line was directive
- state = 1
- for b in self.blocks:
- if b.isDirective():
- #print "$$$ directive %s" % str(b)
- state = 0
- else:
- # a tokLN followed by spaces is replaced by a single tokLN
- # several successive tokLN are replaced by a single one
- #
- dst = []
- src = b.tokens
- n = len(src)
- i = 0
- #print "$$$ parsing %s" % repr(src)
- while i < n:
- # find final tokLN
- j = i
- while j < n and src[j].id != tokLN:
- j += 1
-
- if j >= n:
- # uhhh
- dst += src[i:]
- break
-
- if src[i].id == tokSPACE:
- k = i+1
- while src[k].id == tokSPACE:
- k += 1
-
- if k == j: # empty lines with spaces in it
- i = j # remove the spaces
-
- if i == j:
- # an empty line
- if state == 1:
- i += 1 # remove it
- else:
- state = 1
- dst.append(src[i])
- i += 1
- else:
- # this line is not empty, remove trailing spaces
- k = j
- while k > i and src[k-1].id == tokSPACE:
- k -= 1
-
- nn = i
- while nn < k:
- dst.append(src[nn])
- nn += 1
- dst.append(src[j])
- state = 0
- i = j+1
-
- b.tokens = dst
-
- def removeVarsAndFuncs(self,knownStatics=set()):
- """remove all extern and static declarations corresponding
- to variable and function declarations. we only accept typedefs
- and enum/structs/union declarations.
-
- however, we keep the definitions corresponding to the set
- of known static inline functions in the set 'knownStatics',
- which is useful for optimized byteorder swap functions and
- stuff like that.
- """
- # state = 1 => typedef/struct encountered
- # state = 2 => vars or func declaration encountered, skipping until ";"
- # state = 0 => normal (i.e. LN + spaces)
- state = 0
- depth = 0
- blocks2 = []
- for b in self.blocks:
- if b.isDirective():
- blocks2.append(b)
- else:
- n = len(b.tokens)
- i = 0
- first = 0
- if state == 2:
- first = n
- while i < n:
- tok = b.tokens[i]
- if state == 0:
- bad = 0
- if tok.id in [tokLN, tokSPACE]:
- pass
- elif tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
- state = 1
- else:
- if tok.value in [ 'static', 'extern', '__KINLINE' ]:
- j = i+1
- ident = ""
- while j < n and not (b.tokens[j].id in [ '(', ';' ]):
- if b.tokens[j].id == tokIDENT:
- ident = b.tokens[j].value
- j += 1
- if j < n and ident in knownStatics:
- # this is a known static, we're going to keep its
- # definition in the final output
- state = 1
- else:
- #print "### skip static '%s'" % ident
- pass
-
- if state == 0:
- if i > first:
- #print "### intermediate from '%s': '%s'" % (tok.value, repr(b.tokens[first:i]))
- blocks2.append( Block(b.tokens[first:i]) )
- state = 2
- first = n
-
- else: # state > 0
- if tok.id == '{':
- depth += 1
-
- elif tok.id == '}':
- if depth > 0:
- depth -= 1
-
- elif depth == 0 and tok.id == ';':
- if state == 2:
- first = i+1
- state = 0
-
- i += 1
-
- if i > first:
- #print "### final '%s'" % repr(b.tokens[first:i])
- blocks2.append( Block(b.tokens[first:i]) )
-
- self.blocks = blocks2
-
- def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
- """insert your standard issue disclaimer that this is an
- auto-generated file, etc.."""
- tokens = CppLineTokenizer( disclaimer ).toTokenList()
- tokens = tokens[:-1] # remove trailing tokLN
- self.blocks = [ Block(tokens) ] + self.blocks
-
-class BlockParser:
- """a class used to convert an input source file into a BlockList object"""
-
- def __init__(self,tokzer=None):
- """initialize a block parser. the input source is provided through a Tokenizer
- object"""
- self.reset(tokzer)
-
- def reset(self,tokzer):
- self.state = 1
- self.tokzer = tokzer
-
- def getBlocks(self,tokzer=None):
- """tokenize and parse the input source, return a BlockList object
- NOTE: empty and line-numbering directives are ignored and removed
- from the result. as a consequence, it is possible to have
- two successive text blocks in the result"""
- # state 0 => in source code
- # state 1 => in source code, after a LN
- # state 2 => in source code, after LN then some space
- state = 1
- lastLN = 0
- current = []
- blocks = []
-
- if tokzer == None:
- tokzer = self.tokzer
-
- while 1:
- tok = tokzer.getToken()
- if tok.id == tokEOF:
- break
-
- if tok.id == tokLN:
- state = 1
- current.append(tok)
- lastLN = len(current)
-
- elif tok.id == tokSPACE:
- if state == 1:
- state = 2
- current.append(tok)
-
- elif tok.id == "#":
- if state > 0:
- # this is the start of a directive
-
- if lastLN > 0:
- # record previous tokens as text block
- block = Block(current[:lastLN])
- blocks.append(block)
- lastLN = 0
-
- current = []
-
- # skip spaces after the #
- while 1:
- tok = tokzer.getToken()
- if tok.id != tokSPACE:
- break
-
- if tok.id != tokIDENT:
- # empty or line-numbering, ignore it
- if tok.id != tokLN and tok.id != tokEOF:
- while 1:
- tok = tokzer.getToken()
- if tok.id == tokLN or tok.id == tokEOF:
- break
- continue
-
- directive = tok.value
- lineno = tok.lineno
-
- # skip spaces
- tok = tokzer.getToken()
- while tok.id == tokSPACE:
- tok = tokzer.getToken()
-
- # then record tokens until LN
- dirtokens = []
- while tok.id != tokLN and tok.id != tokEOF:
- dirtokens.append(tok)
- tok = tokzer.getToken()
-
- block = Block(dirtokens,directive,lineno)
- blocks.append(block)
- state = 1
-
- else:
- state = 0
- current.append(tok)
-
- if len(current) > 0:
- block = Block(current)
- blocks.append(block)
-
- return BlockList(blocks)
-
- def parse(self,tokzer):
- return self.getBlocks( tokzer )
-
- def parseLines(self,lines):
- """parse a list of text lines into a BlockList object"""
- return self.getBlocks( CppLinesTokenizer(lines) )
-
- def parseFile(self,path):
- """parse a file into a BlockList object"""
- file = open(path, "rt")
- result = self.getBlocks( CppFileTokenizer(file) )
- file.close()
- return result
-
-
-def test_block_parsing(lines,expected):
- blocks = BlockParser().parse( CppLinesTokenizer(lines) )
- if len(blocks) != len(expected):
- raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
- % (str(blocks), repr(expected))
- for n in range(len(blocks)):
- if str(blocks[n]) != expected[n]:
- raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
- % (n, str(blocks[n]), expected[n])
- #for block in blocks:
- # print block
-
-def test_BlockParser():
- test_block_parsing(["#error hello"],["#error hello"])
- test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
- test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ])
- test_block_parsing(\
- [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
- [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
-
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### B L O C K L I S T O P T I M I Z A T I O N #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-def remove_macro_defines( blocks, excludedMacros=set() ):
- """remove macro definitions like #define <macroName> ...."""
- result = []
- for b in blocks:
- macroName = b.isDefine()
- if macroName == None or not macroName in excludedMacros:
- result.append(b)
-
- return result
-
-def find_matching_endif( blocks, i ):
- n = len(blocks)
- depth = 1
- while i < n:
- if blocks[i].isDirective():
- dir = blocks[i].directive
- if dir in [ "if", "ifndef", "ifdef" ]:
- depth += 1
- elif depth == 1 and dir in [ "else", "elif" ]:
- return i
- elif dir == "endif":
- depth -= 1
- if depth == 0:
- return i
- i += 1
- return i
-
-def optimize_if01( blocks ):
- """remove the code between #if 0 .. #endif in a list of CppBlocks"""
- i = 0
- n = len(blocks)
- result = []
- while i < n:
- j = i
- while j < n and not blocks[j].isIf():
- j += 1
- if j > i:
- D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
- result += blocks[i:j]
- if j >= n:
- break
- expr = blocks[j].expr
- r = expr.toInt()
- if r == None:
- result.append(blocks[j])
- i = j + 1
- continue
-
- if r == 0:
- # if 0 => skip everything until the corresponding #endif
- j = find_matching_endif( blocks, j+1 )
- if j >= n:
- # unterminated #if 0, finish here
- break
- dir = blocks[j].directive
- if dir == "endif":
- D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
- i = j + 1
- elif dir == "else":
- # convert 'else' into 'if 1'
- D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
- blocks[j].directive = "if"
- blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() )
- i = j
- elif dir == "elif":
- # convert 'elif' into 'if'
- D2("convert 'if 0' .. 'elif' into 'if'")
- blocks[j].directive = "if"
- i = j
- continue
-
- # if 1 => find corresponding endif and remove/transform them
- k = find_matching_endif( blocks, j+1 )
- if k >= n:
- # unterminated #if 1, finish here
- D2("unterminated 'if 1'")
- result += blocks[j+1:k]
- break
-
- dir = blocks[k].directive
- if dir == "endif":
- D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
- result += optimize_if01(blocks[j+1:k])
- i = k+1
- elif dir == "else":
- # convert 'else' into 'if 0'
- D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
- result += optimize_if01(blocks[j+1:k])
- blocks[k].directive = "if"
- blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
- i = k
- elif dir == "elif":
- # convert 'elif' into 'if 0'
- D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
- result += optimize_if01(blocks[j+1:k])
- blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() )
- i = k
- return result
-
-def test_optimizeAll():
- text = """\
-#if 1
-#define GOOD_1
-#endif
-#if 0
-#define BAD_2
-#define BAD_3
-#endif
-
-#if 1
-#define GOOD_2
-#else
-#define BAD_4
-#endif
-
-#if 0
-#define BAD_5
-#else
-#define GOOD_3
-#endif
-
-#if 0
-#if 1
-#define BAD_6
-#endif
-#endif\
-"""
-
- expected = """\
-#define GOOD_1
-
-#define GOOD_2
-
-#define GOOD_3
-
-"""
-
- print "running test_BlockList.optimizeAll"
- out = StringOutput()
- lines = string.split(text, '\n')
- list = BlockParser().parse( CppLinesTokenizer(lines) )
- #D_setlevel(2)
- list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
- #print repr(list)
- list.write(out)
- if out.get() != expected:
- print "KO: macro optimization failed\n"
- print "<<<< expecting '",
- print expected,
- print "'\n>>>> result '"
- print out.get(),
- print "'\n----"
-
-
-#####################################################################################
-#####################################################################################
-##### #####
-##### #####
-##### #####
-#####################################################################################
-#####################################################################################
-
-def runUnitTests():
- """run all unit tests for this program"""
- print "running unit tests"
- test_CppTokenizer()
- test_CppExpr()
- test_optimizeAll()
- test_BlockParser()
- print "OK"
-
-if __name__ == "__main__":
- runUnitTests()