The parser functionality. Parses a string and return S-expression tokens.
from typing import Literal, TypedDict, Union
from pylisp.environment import Expr, Symbol
TokenType = Literal[
"LEFT_PAREN", "RIGHT_PAREN", "NUMBER", "SYMBOL", "KEYWORD", "BOOLEAN", "STRING"
]
KEYWORDS = ("lambda", "if", "quote", "define", "let")
A token represents a piece of syntax.
class Token(TypedDict):
token_type: TokenType
payload: None | str | int | bool
Read a string and produce a list of tokens.
def tokenize(inp: str) -> list[Token]:
curr = inp
tokens: list[Token] = []
while len(curr) > 0:
curr = slurp_whitespace(curr)
if len(curr) == 0:
break
token, curr = slurp_token(curr)
tokens.append(token)
return tokens
Given a string, return the index of last non-whitespace.
def find_last_non_whitespace(inp: str) -> int:
i = 0
for char in inp:
if char in (" ", ")", "("):
break
i += 1
return i
Try to slurp an integer.
def slurp_integer(inp: str) -> tuple[Token, str]:
curr_ind = 0
while curr_ind < len(inp) and inp[curr_ind].isdigit():
curr_ind += 1
return (Token(token_type="NUMBER", payload=int(inp[:curr_ind])), inp[curr_ind:])
def slurp_string(inp: str) -> tuple[Token, str]:
curr_ind = 1
Find end of string. I.e look for second “.
while inp[curr_ind] != '"':
curr_ind += 1
if curr_ind == len(inp):
raise RuntimeError("No closing delimiter for string.")
return (
Token(token_type="STRING", payload=inp[1:curr_ind]),
inp[curr_ind + 1 :],
)
Eat a single token and return the token and the rest of the input string.
def slurp_token(inp: str) -> tuple[Token, str]:
first_letter = inp[0]
if first_letter == "(":
return (Token(token_type="LEFT_PAREN", payload=None), inp[1:])
if first_letter == ")":
return (Token(token_type="RIGHT_PAREN", payload=None), inp[1:])
if inp[0].isdigit():
return slurp_integer(inp)
if inp[0] == '"':
return slurp_string(inp)
if inp[0] == "#" and len(inp) >= 2:
if (false_or_true := inp[1]) in ("f", "t"):
value = false_or_true == "t"
return (Token(token_type="BOOLEAN", payload=value), inp[2:])
last_char_word = find_last_non_whitespace(inp)
the_word = inp[0:last_char_word]
if the_word in KEYWORDS:
return (Token(token_type="KEYWORD", payload=the_word), inp[last_char_word:])
return (
Token(token_type="SYMBOL", payload=the_word),
inp[last_char_word:],
)
Remove whitespace from beginning of string and return it.
def slurp_whitespace(inp: str) -> str:
i = 1
while inp[:i].isspace():
i += 1
return inp[i - 1 :]
Value = str | int | bool | Symbol
ValueExp = Union[list[Union["ValueExp", Value]], Value]
Parse s-expression into binary tree to be used by the evaluator.
def parse_string(inp: str) -> Expr:
tokens = tokenize(inp)
stack: list[list[Expr]] = [[]]
current_token = 0
while current_token < len(tokens):
if tokens[current_token]["token_type"] == "LEFT_PAREN":
stack.append([])
elif tokens[current_token]["token_type"] == "RIGHT_PAREN":
top = stack.pop()
stack_len = len(stack)
if stack_len == 0:
raise RuntimeError("Unbalanced right parenthesis.")
stack[stack_len - 1].append(top)
else:
stack_len = len(stack)
top_token = tokens[current_token]
value: str | int | bool | Symbol
if top_token["token_type"] == "SYMBOL":
value = Symbol(name=str(top_token["payload"]))
else:
payload = top_token["payload"]
assert payload is not None
value = payload
stack[stack_len - 1].append(value)
current_token += 1
if len(stack) == 0 or len(stack[0]) == 0:
raise RuntimeError("Unbalanced parenthesis.")
return stack[0][0]