CS75 Spring 2011
Project 2b: A sample expression parser


from scanner import *
import sys

"""
CFG for expressions:

E -> -E | E = E | E - E | E + E | E * E | E / E | ( E ) | id | num

Converted into the following  LL(1) grammar:

E0     -> E1 E0rest
E0rest -> = E0 | epsilon
E1     -> E2 E1rest
E1rest -> - E2 E1rest | + E2 E1rest | epsilon
E2     -> E3 E2rest
E2rest -> * E3 E2rest | / E3 E2rest | epsilon
E3     -> - E3 | E4
E4     -> ( E0 ) | id | num
"""

class ExpressionParser:
    """
    Implements the LL(1) grammar above as a recursive descent parser.
    """
    def __init__(self, filename, debug=False):
        self.scanner = LexicalAnalyzer(filename)
        self.debug = debug
        self.token = None
        self.value = None

    def match(self, tokenType):
        if tokenType == self.token:
            if self.debug:
                print "MATCH:", self.token,
                if self.value == None:
                    print
                else:
                    print self.value
            self.token, self.value = self.scanner.getToken()
        elif self.token == 'err':
            self.emitError(self.value)
        else:
            self.emitError("expected %s, but found %s" % \
                             (tokenType, self.token))

    def emitError(self, msg):
        print "Error on line %d: %s" % (self.scanner.getLineNum(), msg)
        print self.scanner.getFile().getLineText()
        raise SyntaxError

    def parse(self):
        """
        Expects a sequence of expressions separated by semicolons.
        """
        self.token, self.value = self.scanner.getToken()
        while True:
            try:
                self.E0()
                self.match('semi')
            except SyntaxError:
                print "Unrecoverable error, parser exiting"
                sys.exit(-1)
            if self.token == 'done':
                break
        print "Parse was successful"

    def E0(self):
        if self.debug:
            print "E0"
        self.E1()
        self.E0rest()

    def E0rest(self):
        if self.debug:
            print "E0rest"
        if self.token == 'assign':
            self.match('assign')
            self.E0()
            
    def E1(self):
        if self.debug:
            print "E1"
        self.E2()
        self.E1rest()

    def E1rest(self):
        if self.debug:
            print "E1rest"
        if self.token == 'sub':
            self.match('sub')
            self.E2()
            self.E1rest()
        elif self.token == 'add':
            self.match('add')
            self.E2()
            self.E1rest()
            
    def E2(self):
        if self.debug:
            print "E2"
        self.E3()
        self.E2rest()
        
    def E2rest(self):
        if self.debug:
            print "E2rest"
        if self.token == 'mul':
            self.match('mul')
            self.E3()
            self.E2rest()
        elif self.token == 'div':
            self.match('div')
            self.E3()
            self.E2rest()
            
    def E3(self):
        if self.debug:
            print "E3"
        if self.token == 'sub':
            self.match('sub')
            self.E3()
        else:
            self.E4()

    def E4(self):
        if self.debug:
            print "E4"
        if self.token == 'lparen':
            self.match('lparen')
            self.E0()
            self.match('rparen')
        elif self.token == 'id':
            self.match('id')
        elif self.token == 'num':
            self.match('num')
        else:
            self.emitError("invalid expression")

def main():
    if len(sys.argv) == 1:
        print "Error: must provide filename to parse"
        return
    p = ExpressionParser(sys.argv[1], True)
    p.parse()

if __name__ == '__main__':
    main()