CS75 Spring 2011
Project 2b: A sample expression parser
from scanner import *
import sys
"""
CFG for expressions:
E -> -E | E = E | E - E | E + E | E * E | E / E | ( E ) | id | num
Converted into the following LL(1) grammar:
E0 -> E1 E0rest
E0rest -> = E0 | epsilon
E1 -> E2 E1rest
E1rest -> - E2 E1rest | + E2 E1rest | epsilon
E2 -> E3 E2rest
E2rest -> * E3 E2rest | / E3 E2rest | epsilon
E3 -> - E3 | E4
E4 -> ( E0 ) | id | num
"""
class ExpressionParser:
"""
Implements the LL(1) grammar above as a recursive descent parser.
"""
def __init__(self, filename, debug=False):
self.scanner = LexicalAnalyzer(filename)
self.debug = debug
self.token = None
self.value = None
def match(self, tokenType):
if tokenType == self.token:
if self.debug:
print "MATCH:", self.token,
if self.value == None:
print
else:
print self.value
self.token, self.value = self.scanner.getToken()
elif self.token == 'err':
self.emitError(self.value)
else:
self.emitError("expected %s, but found %s" % \
(tokenType, self.token))
def emitError(self, msg):
print "Error on line %d: %s" % (self.scanner.getLineNum(), msg)
print self.scanner.getFile().getLineText()
raise SyntaxError
def parse(self):
"""
Expects a sequence of expressions separated by semicolons.
"""
self.token, self.value = self.scanner.getToken()
while True:
try:
self.E0()
self.match('semi')
except SyntaxError:
print "Unrecoverable error, parser exiting"
sys.exit(-1)
if self.token == 'done':
break
print "Parse was successful"
def E0(self):
if self.debug:
print "E0"
self.E1()
self.E0rest()
def E0rest(self):
if self.debug:
print "E0rest"
if self.token == 'assign':
self.match('assign')
self.E0()
def E1(self):
if self.debug:
print "E1"
self.E2()
self.E1rest()
def E1rest(self):
if self.debug:
print "E1rest"
if self.token == 'sub':
self.match('sub')
self.E2()
self.E1rest()
elif self.token == 'add':
self.match('add')
self.E2()
self.E1rest()
def E2(self):
if self.debug:
print "E2"
self.E3()
self.E2rest()
def E2rest(self):
if self.debug:
print "E2rest"
if self.token == 'mul':
self.match('mul')
self.E3()
self.E2rest()
elif self.token == 'div':
self.match('div')
self.E3()
self.E2rest()
def E3(self):
if self.debug:
print "E3"
if self.token == 'sub':
self.match('sub')
self.E3()
else:
self.E4()
def E4(self):
if self.debug:
print "E4"
if self.token == 'lparen':
self.match('lparen')
self.E0()
self.match('rparen')
elif self.token == 'id':
self.match('id')
elif self.token == 'num':
self.match('num')
else:
self.emitError("invalid expression")
def main():
if len(sys.argv) == 1:
print "Error: must provide filename to parse"
return
p = ExpressionParser(sys.argv[1], True)
p.parse()
if __name__ == '__main__':
main()