-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathparser.py
More file actions
342 lines (263 loc) · 9.85 KB
/
parser.py
File metadata and controls
342 lines (263 loc) · 9.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""
Parser Module
=============
This module implements a recursive descent parser for mathematical expressions.
It takes tokens from the tokenizer and builds an Abstract Syntax Tree (AST).
The parser respects operator precedence:
- Parentheses: highest
- Multiplication and Division: medium
- Addition and Subtraction: lowest
Grammar (in EBNF notation):
expression := term ((PLUS | MINUS) term)*
term := factor ((MULTIPLY | DIVIDE) factor)*
factor := NUMBER | LPAREN expression RPAREN | (PLUS|MINUS) factor
"""
from tokenizer import Token, TokenType, Tokenizer
from typing import Optional, List
class ASTNode:
"""
Base class for all Abstract Syntax Tree nodes.
An AST represents the structure of the expression in a tree form,
which makes it easy to evaluate or transform.
"""
pass
class NumberNode(ASTNode):
"""
Represents a number in the AST.
Example: In "3 + 5", both 3 and 5 are NumberNodes.
"""
def __init__(self, value: float):
self.value = value
def __repr__(self):
return f"Number({self.value})"
class BinaryOpNode(ASTNode):
"""
Represents a binary operation (an operation with two operands).
Examples:
- "3 + 5" becomes BinaryOpNode(+, Number(3), Number(5))
- "2 * 4" becomes BinaryOpNode(*, Number(2), Number(4))
Attributes:
operator: The operator token (PLUS, MINUS, etc.)
left: The left operand (another ASTNode)
right: The right operand (another ASTNode)
"""
def __init__(self, operator: Token, left: ASTNode, right: ASTNode):
self.operator = operator
self.left = left
self.right = right
def __repr__(self):
return f"BinaryOp({self.operator.value}, {self.left}, {self.right})"
class UnaryOpNode(ASTNode):
"""
Represents a unary operation (an operation with one operand).
Example: "-5" becomes UnaryOpNode(-, Number(5))
Attributes:
operator: The operator token (PLUS or MINUS)
operand: The operand (another ASTNode)
"""
def __init__(self, operator: Token, operand: ASTNode):
self.operator = operator
self.operand = operand
def __repr__(self):
return f"UnaryOp({self.operator.value}, {self.operand})"
class Parser:
"""
Parses tokens into an Abstract Syntax Tree (AST).
This is a recursive descent parser, which means it uses recursive
functions that mirror the grammar rules.
Example:
>>> tokenizer = Tokenizer("3 + 5 * 2")
>>> tokens = tokenizer.tokenize()
>>> parser = Parser(tokens)
>>> ast = parser.parse()
>>> print(ast)
BinaryOp(+, Number(3.0), BinaryOp(*, Number(5.0), Number(2.0)))
"""
def __init__(self, tokens: List[Token]):
"""
Initialize the parser with a list of tokens.
Args:
tokens: List of Token objects from the tokenizer
"""
self.tokens = tokens
self.position = 0
self.current_token = self.tokens[0] if tokens else None
def error(self, message: str):
"""Raise a parsing error with context."""
raise ValueError(
f"Parser error at position {self.current_token.position}: {message}\n"
f"Current token: {self.current_token}"
)
def advance(self):
"""Move to the next token."""
self.position += 1
if self.position < len(self.tokens):
self.current_token = self.tokens[self.position]
def expect(self, token_type: TokenType):
"""
Verify that the current token matches the expected type.
Args:
token_type: The expected TokenType
Raises:
ValueError: If the current token doesn't match
"""
if self.current_token.type != token_type:
self.error(f"Expected {token_type.name}, got {self.current_token.type.name}")
def factor(self) -> ASTNode:
"""
Parse a factor (the highest precedence items).
Grammar: factor := NUMBER | LPAREN expression RPAREN | (PLUS|MINUS) factor
This handles:
- Numbers: 42, 3.14
- Parenthesized expressions: (3 + 5)
- Unary operators: -5, +3
Returns:
An ASTNode representing the factor
"""
token = self.current_token
# Unary plus or minus
if token.type in (TokenType.PLUS, TokenType.MINUS):
self.advance()
return UnaryOpNode(token, self.factor())
# Number
elif token.type == TokenType.NUMBER:
self.advance()
return NumberNode(token.value)
# Parenthesized expression
elif token.type == TokenType.LPAREN:
self.advance()
node = self.expression()
self.expect(TokenType.RPAREN)
self.advance()
return node
else:
self.error(f"Unexpected token in factor: {token.type.name}")
def term(self) -> ASTNode:
"""
Parse a term (multiplication and division).
Grammar: term := factor ((MULTIPLY | DIVIDE) factor)*
This handles expressions like:
- 3 * 5
- 10 / 2
- 2 * 3 * 4
Returns:
An ASTNode representing the term
"""
node = self.factor()
while self.current_token.type in (TokenType.MULTIPLY, TokenType.DIVIDE):
operator = self.current_token
self.advance()
node = BinaryOpNode(operator, node, self.factor())
return node
def expression(self) -> ASTNode:
"""
Parse an expression (addition and subtraction).
Grammar: expression := term ((PLUS | MINUS) term)*
This is the lowest precedence level, so it's called first.
It handles expressions like:
- 3 + 5
- 10 - 2
- 2 + 3 * 4 (correctly evaluates as 2 + (3 * 4))
Returns:
An ASTNode representing the expression
"""
node = self.term()
while self.current_token.type in (TokenType.PLUS, TokenType.MINUS):
operator = self.current_token
self.advance()
node = BinaryOpNode(operator, node, self.term())
return node
def parse(self) -> ASTNode:
"""
Parse the tokens and return the root of the AST.
This is the main entry point for parsing.
Returns:
The root ASTNode of the parsed expression
Raises:
ValueError: If there's a syntax error
"""
ast = self.expression()
# Make sure we consumed all tokens (except EOF)
if self.current_token.type != TokenType.EOF:
self.error(f"Unexpected token after expression: {self.current_token.type.name}")
return ast
class Interpreter:
"""
Evaluates an AST and returns the result.
This is also called the "semantic analysis" phase.
It walks the tree and computes the actual values.
Example:
>>> tokenizer = Tokenizer("3 + 5")
>>> tokens = tokenizer.tokenize()
>>> parser = Parser(tokens)
>>> ast = parser.parse()
>>> interpreter = Interpreter()
>>> result = interpreter.evaluate(ast)
>>> print(result)
8.0
"""
def evaluate(self, node: ASTNode) -> float:
"""
Recursively evaluate an AST node.
Args:
node: The ASTNode to evaluate
Returns:
The numeric result
Raises:
ValueError: If there's a runtime error (like division by zero)
"""
if isinstance(node, NumberNode):
return node.value
elif isinstance(node, UnaryOpNode):
operand = self.evaluate(node.operand)
if node.operator.type == TokenType.PLUS:
return +operand
elif node.operator.type == TokenType.MINUS:
return -operand
else:
raise ValueError(f"Unknown unary operator: {node.operator.type}")
elif isinstance(node, BinaryOpNode):
left = self.evaluate(node.left)
right = self.evaluate(node.right)
if node.operator.type == TokenType.PLUS:
return left + right
elif node.operator.type == TokenType.MINUS:
return left - right
elif node.operator.type == TokenType.MULTIPLY:
return left * right
elif node.operator.type == TokenType.DIVIDE:
if right == 0:
raise ValueError("Division by zero")
return left / right
else:
raise ValueError(f"Unknown binary operator: {node.operator.type}")
raise ValueError(f"Unknown node type: {type(node)}")
# Example usage
if __name__ == "__main__":
examples = [
"3 + 5",
"10 - 2 * 3",
"(10 - 2) * 3",
"2 + 3 * 4",
"-5 + 10",
"100 / (2 + 3) * 2"
]
interpreter = Interpreter()
print("Mini Parser Examples")
print("=" * 50)
for expr in examples:
try:
# Tokenize
tokenizer = Tokenizer(expr)
tokens = tokenizer.tokenize()
# Parse
parser = Parser(tokens)
ast = parser.parse()
# Evaluate
result = interpreter.evaluate(ast)
print(f"\nExpression: {expr}")
print(f"AST: {ast}")
print(f"Result: {result}")
except Exception as e:
print(f"\nExpression: {expr}")
print(f"Error: {e}")