EPOCH4.0-MiniParser/parser.py at main · github-community-gitam/EPOCH4.0-MiniParser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""
Parser Module
=============

This module implements a recursive descent parser for mathematical expressions.
It takes tokens from the tokenizer and builds an Abstract Syntax Tree (AST).

The parser respects operator precedence:
  - Parentheses: highest
  - Multiplication and Division: medium
  - Addition and Subtraction: lowest

Grammar (in EBNF notation):
    expression  := term ((PLUS | MINUS) term)*
    term        := factor ((MULTIPLY | DIVIDE) factor)*
    factor      := NUMBER | LPAREN expression RPAREN | (PLUS|MINUS) factor
"""

from tokenizer import Token, TokenType, Tokenizer
from typing import Optional, List


class ASTNode:
    """
    Base class for all Abstract Syntax Tree nodes.

    An AST represents the structure of the expression in a tree form,
    which makes it easy to evaluate or transform.
    """
    pass


class NumberNode(ASTNode):
    """
    Represents a number in the AST.

    Example: In "3 + 5", both 3 and 5 are NumberNodes.
    """
    def __init__(self, value: float):
        self.value = value

    def __repr__(self):
        return f"Number({self.value})"


class BinaryOpNode(ASTNode):
    """
    Represents a binary operation (an operation with two operands).

    Examples:
      - "3 + 5" becomes BinaryOpNode(+, Number(3), Number(5))
      - "2 * 4" becomes BinaryOpNode(*, Number(2), Number(4))

    Attributes:
        operator: The operator token (PLUS, MINUS, etc.)
        left: The left operand (another ASTNode)
        right: The right operand (another ASTNode)
    """
    def __init__(self, operator: Token, left: ASTNode, right: ASTNode):
        self.operator = operator
        self.left = left
        self.right = right

    def __repr__(self):
        return f"BinaryOp({self.operator.value}, {self.left}, {self.right})"


class UnaryOpNode(ASTNode):
    """
    Represents a unary operation (an operation with one operand).

    Example: "-5" becomes UnaryOpNode(-, Number(5))

    Attributes:
        operator: The operator token (PLUS or MINUS)
        operand: The operand (another ASTNode)
    """
    def __init__(self, operator: Token, operand: ASTNode):
        self.operator = operator
        self.operand = operand

    def __repr__(self):
        return f"UnaryOp({self.operator.value}, {self.operand})"


class Parser:
    """
    Parses tokens into an Abstract Syntax Tree (AST).

    This is a recursive descent parser, which means it uses recursive
    functions that mirror the grammar rules.

    Example:
        >>> tokenizer = Tokenizer("3 + 5 * 2")
        >>> tokens = tokenizer.tokenize()
        >>> parser = Parser(tokens)
        >>> ast = parser.parse()
        >>> print(ast)
        BinaryOp(+, Number(3.0), BinaryOp(*, Number(5.0), Number(2.0)))
    """

    def __init__(self, tokens: List[Token]):
        """
        Initialize the parser with a list of tokens.

        Args:
            tokens: List of Token objects from the tokenizer
        """
        self.tokens = tokens
        self.position = 0
        self.current_token = self.tokens[0] if tokens else None

    def error(self, message: str):
        """Raise a parsing error with context."""
        raise ValueError(
            f"Parser error at position {self.current_token.position}: {message}\n"
            f"Current token: {self.current_token}"
        )

    def advance(self):
        """Move to the next token."""
        self.position += 1
        if self.position < len(self.tokens):
            self.current_token = self.tokens[self.position]

    def expect(self, token_type: TokenType):
        """
        Verify that the current token matches the expected type.

        Args:
            token_type: The expected TokenType

        Raises:
            ValueError: If the current token doesn't match
        """
        if self.current_token.type != token_type:
            self.error(f"Expected {token_type.name}, got {self.current_token.type.name}")

    def factor(self) -> ASTNode:
        """
        Parse a factor (the highest precedence items).

        Grammar: factor := NUMBER | LPAREN expression RPAREN | (PLUS|MINUS) factor

        This handles:
          - Numbers: 42, 3.14
          - Parenthesized expressions: (3 + 5)
          - Unary operators: -5, +3

        Returns:
            An ASTNode representing the factor
        """
        token = self.current_token

        # Unary plus or minus
        if token.type in (TokenType.PLUS, TokenType.MINUS):
            self.advance()
            return UnaryOpNode(token, self.factor())

        # Number
        elif token.type == TokenType.NUMBER:
            self.advance()
            return NumberNode(token.value)

        # Parenthesized expression
        elif token.type == TokenType.LPAREN:
            self.advance()
            node = self.expression()
            self.expect(TokenType.RPAREN)
            self.advance()
            return node

        else:
            self.error(f"Unexpected token in factor: {token.type.name}")

    def term(self) -> ASTNode:
        """
        Parse a term (multiplication and division).

        Grammar: term := factor ((MULTIPLY | DIVIDE) factor)*

        This handles expressions like:
          - 3 * 5
          - 10 / 2
          - 2 * 3 * 4

        Returns:
            An ASTNode representing the term
        """
        node = self.factor()

        while self.current_token.type in (TokenType.MULTIPLY, TokenType.DIVIDE):
            operator = self.current_token
            self.advance()
            node = BinaryOpNode(operator, node, self.factor())

        return node

    def expression(self) -> ASTNode:
        """
        Parse an expression (addition and subtraction).

        Grammar: expression := term ((PLUS | MINUS) term)*

        This is the lowest precedence level, so it's called first.
        It handles expressions like:
          - 3 + 5
          - 10 - 2
          - 2 + 3 * 4 (correctly evaluates as 2 + (3 * 4))

        Returns:
            An ASTNode representing the expression
        """
        node = self.term()

        while self.current_token.type in (TokenType.PLUS, TokenType.MINUS):
            operator = self.current_token
            self.advance()
            node = BinaryOpNode(operator, node, self.term())

        return node

    def parse(self) -> ASTNode:
        """
        Parse the tokens and return the root of the AST.

        This is the main entry point for parsing.

        Returns:
            The root ASTNode of the parsed expression

        Raises:
            ValueError: If there's a syntax error
        """
        ast = self.expression()

        # Make sure we consumed all tokens (except EOF)
        if self.current_token.type != TokenType.EOF:
            self.error(f"Unexpected token after expression: {self.current_token.type.name}")

        return ast


class Interpreter:
    """
    Evaluates an AST and returns the result.

    This is also called the "semantic analysis" phase.
    It walks the tree and computes the actual values.

    Example:
        >>> tokenizer = Tokenizer("3 + 5")
        >>> tokens = tokenizer.tokenize()
        >>> parser = Parser(tokens)
        >>> ast = parser.parse()
        >>> interpreter = Interpreter()
        >>> result = interpreter.evaluate(ast)
        >>> print(result)
        8.0
    """

    def evaluate(self, node: ASTNode) -> float:
        """
        Recursively evaluate an AST node.

        Args:
            node: The ASTNode to evaluate

        Returns:
            The numeric result

        Raises:
            ValueError: If there's a runtime error (like division by zero)
        """
        if isinstance(node, NumberNode):
            return node.value

        elif isinstance(node, UnaryOpNode):
            operand = self.evaluate(node.operand)
            if node.operator.type == TokenType.PLUS:
                return +operand
            elif node.operator.type == TokenType.MINUS:
                return -operand
            else:
                raise ValueError(f"Unknown unary operator: {node.operator.type}")

        elif isinstance(node, BinaryOpNode):
            left = self.evaluate(node.left)
            right = self.evaluate(node.right)

            if node.operator.type == TokenType.PLUS:
                return left + right
            elif node.operator.type == TokenType.MINUS:
                return left - right
            elif node.operator.type == TokenType.MULTIPLY:
                return left * right
            elif node.operator.type == TokenType.DIVIDE:
                if right == 0:
                    raise ValueError("Division by zero")
                return left / right
            else:
                raise ValueError(f"Unknown binary operator: {node.operator.type}")

        raise ValueError(f"Unknown node type: {type(node)}")


# Example usage
if __name__ == "__main__":
    examples = [
        "3 + 5",
        "10 - 2 * 3",
        "(10 - 2) * 3",
        "2 + 3 * 4",
        "-5 + 10",
        "100 / (2 + 3) * 2"
    ]

    interpreter = Interpreter()

    print("Mini Parser Examples")
    print("=" * 50)

    for expr in examples:
        try:
            # Tokenize
            tokenizer = Tokenizer(expr)
            tokens = tokenizer.tokenize()

            # Parse
            parser = Parser(tokens)
            ast = parser.parse()

            # Evaluate
            result = interpreter.evaluate(ast)

            print(f"\nExpression: {expr}")
            print(f"AST: {ast}")
            print(f"Result: {result}")

        except Exception as e:
            print(f"\nExpression: {expr}")
            print(f"Error: {e}")