Refactor parser & interpreter structure

- Changed Interpreter & Parser to be traits and the original
  implementations to be concrete impls for the traits
This commit is contained in:
Daniel M 2021-12-29 15:17:01 +01:00
parent 40ea6c1f8e
commit 110bacbf6d
5 changed files with 709 additions and 671 deletions

View File

@ -1,109 +1,13 @@
use std::collections::HashMap;
mod tree_walk_interpreter;
use crate::{ast::{Statement, Expr, Ast, BinOpType, UnOpType}, token::Literal};
pub use tree_walk_interpreter::TWInterpreter;
pub struct Interpreter {
prog: Vec<Statement>,
use crate::ast::Ast;
// TODO: Those varibales are global only, so this will have to change with functions. Also Literal is reused as variable type
variables: HashMap<String, Literal>,
// Print expression statements to stdout
debug: bool,
}
impl Interpreter {
pub fn new(prog: Ast) -> Self {
let variables = Default::default();
Self { prog: prog.prog, variables, debug: true }
}
pub fn run(&mut self) {
for idx in 0..self.prog.len() {
self.execute_stmt(idx);
}
}
pub fn execute_stmt(&mut self, idx: usize) {
// TODO: The clone here is not optimal
match self.prog[idx].clone() {
Statement::Expr(expr) => {
let expr_result = self.execute_expr(expr.clone());
if self.debug {
println!("{:?}", expr_result);
}
}
Statement::LetBinding(var_name, expr) => {
let rhs = self.execute_expr(expr);
self.variables.insert(var_name, rhs);
}
Statement::Assignment(var_name, expr) => {
let rhs = self.execute_expr(expr);
*self.variables.get_mut(&var_name).expect("Assigning variable before declaration") = rhs;
},
Statement::FnDef(_) => todo!(),
Statement::IfStmt(_) => todo!(),
Statement::WhileStmt(_) => todo!(),
}
}
fn execute_expr(&mut self, expr: Expr) -> Literal {
match expr {
Expr::Literal(lit) => lit,
Expr::Variable(name) => self.variables.get(&name).expect("Using variable before declaration").clone(),
Expr::FnCall(_) => todo!(),
Expr::BinOp(bot, lhs, rhs) => {
let lhs = match self.execute_expr(*lhs) {
Literal::Int64(val) => val,
_ => panic!("Binary operators for non i64 not yet implemented")
};
let rhs = match self.execute_expr(*rhs) {
Literal::Int64(val) => val,
_ => panic!("Binary operators for non i64 not yet implemented")
};
let res = match bot {
BinOpType::Add => lhs + rhs,
BinOpType::Sub => lhs - rhs,
BinOpType::Mul => lhs * rhs,
BinOpType::Div => lhs / rhs,
BinOpType::Mod => lhs % rhs,
BinOpType::Eq => todo!(),
BinOpType::Neq => todo!(),
BinOpType::Gt => todo!(),
BinOpType::Lt => todo!(),
BinOpType::Ge => todo!(),
BinOpType::Le => todo!(),
BinOpType::And => todo!(),
BinOpType::Or => todo!(),
BinOpType::Xor => todo!(),
};
Literal::Int64(res)
}
Expr::UnOp(uot, expr) => {
match uot {
UnOpType::Neg => {
let mut res = self.execute_expr(*expr);
match &mut res {
Literal::Boolean(_) => panic!("Can't negate bool"),
Literal::Int64(val) => *val *= -1,
Literal::String(_) => panic!("Can't negate string"),
};
res
}
UnOpType::Not => todo!(),
}
}
}
}
pub trait Interpreter {
/// Initialize the interpreter with the given program
fn new(prog: Ast) -> Self;
/// Run the program
fn run(&mut self);
}

View File

@ -0,0 +1,115 @@
use std::collections::HashMap;
use crate::{ast::{Statement, Expr, Ast, BinOpType, UnOpType}, token::Literal};
use super::Interpreter;
/// Tree-Walk-Interpreter
pub struct TWInterpreter {
prog: Vec<Statement>,
// TODO: Those varibales are global only, so this will have to change with functions. Also Literal is reused as variable type
variables: HashMap<String, Literal>,
// Print expression statements to stdout
debug: bool,
}
impl Interpreter for TWInterpreter {
fn new(prog: Ast) -> Self {
let variables = Default::default();
Self { prog: prog.prog, variables, debug: true }
}
fn run(&mut self) {
for idx in 0..self.prog.len() {
self.execute_stmt(idx);
}
}
}
impl TWInterpreter {
pub fn execute_stmt(&mut self, idx: usize) {
// TODO: The clone here is not optimal
match self.prog[idx].clone() {
Statement::Expr(expr) => {
let expr_result = self.execute_expr(expr.clone());
if self.debug {
println!("{:?}", expr_result);
}
}
Statement::LetBinding(var_name, expr) => {
let rhs = self.execute_expr(expr);
self.variables.insert(var_name, rhs);
}
Statement::Assignment(var_name, expr) => {
let rhs = self.execute_expr(expr);
*self.variables.get_mut(&var_name).expect("Assigning variable before declaration") = rhs;
},
Statement::FnDef(_) => todo!(),
Statement::IfStmt(_) => todo!(),
Statement::WhileStmt(_) => todo!(),
}
}
fn execute_expr(&mut self, expr: Expr) -> Literal {
match expr {
Expr::Literal(lit) => lit,
Expr::Variable(name) => self.variables.get(&name).expect("Using variable before declaration").clone(),
Expr::FnCall(_) => todo!(),
Expr::BinOp(bot, lhs, rhs) => {
let lhs = match self.execute_expr(*lhs) {
Literal::Int64(val) => val,
_ => panic!("Binary operators for non i64 not yet implemented")
};
let rhs = match self.execute_expr(*rhs) {
Literal::Int64(val) => val,
_ => panic!("Binary operators for non i64 not yet implemented")
};
let res = match bot {
BinOpType::Add => lhs + rhs,
BinOpType::Sub => lhs - rhs,
BinOpType::Mul => lhs * rhs,
BinOpType::Div => lhs / rhs,
BinOpType::Mod => lhs % rhs,
BinOpType::Eq => todo!(),
BinOpType::Neq => todo!(),
BinOpType::Gt => todo!(),
BinOpType::Lt => todo!(),
BinOpType::Ge => todo!(),
BinOpType::Le => todo!(),
BinOpType::And => todo!(),
BinOpType::Or => todo!(),
BinOpType::Xor => todo!(),
};
Literal::Int64(res)
}
Expr::UnOp(uot, expr) => {
match uot {
UnOpType::Neg => {
let mut res = self.execute_expr(*expr);
match &mut res {
Literal::Boolean(_) => panic!("Can't negate bool"),
Literal::Int64(val) => *val *= -1,
Literal::String(_) => panic!("Can't negate string"),
};
res
}
UnOpType::Not => todo!(),
}
}
}
}
}

View File

@ -5,5 +5,5 @@ pub mod parser;
pub mod interpreter;
pub use lexer::Lexer;
pub use parser::Parser;
pub use interpreter::Interpreter;
pub use parser::{Parser, CustomParser};
pub use interpreter::TWInterpreter;

View File

@ -1,572 +1,16 @@
use crate::{
ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType},
token::{Group, Keyword, Op, Token, TokenStream},
};
use crate::{ast::Ast, token::TokenStream};
mod custom_parser;
pub use custom_parser::CustomParser;
/// TODO: Real parsing errors instead of panics in the Parser
#[derive(Debug)]
pub struct ParseErr;
type PRes<T> = Result<T, ParseErr>;
pub(crate) type PRes<T> = Result<T, ParseErr>;
/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree).
///
/// ## Grammar
/// ### Statements
/// `stmt_let = "let" IDENT "=" expr` \
/// `stmt_assign = IDENT "=" expr` \
/// `stmt_fn_def = "fn" IDENT "(" IDENT? ( "," IDENT)* ")" "{" stmt* "}"` \
/// `stmt_if = "if" expr "{" stmt* "}" ( "else" "{" stmt* "}" )?` \
/// `stmt_while = "while" expr "{" stmt* "}"`
/// `stmt = ( stmt_let | stmt_assign | expr | stmt_fn_def | stmt_if | stmt_while | ) ";"` \
///
/// ### Expressions
/// `expr_literal = LITERAL` \
/// `expr_fn_call = IDENT "(" expr? ( "," expr )* ")"` \
/// `expr_varibale = IDENT` \
/// `expr_value = expr_literal | expr_fn_call | expr_variable` \
/// `expr_term = "-" expr_term | "!" expr_term | "(" expr ")" | expr_value` \
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \
/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \
/// `expr_rel = expr_add ((">"|"<"|">="|"<=") expr_add)*`
/// `expr_equal = expr_rel (("=="|"!=") expr_rel)*`
/// `expr_and = expr_equal ("&&" expr_equal)*`
/// `expr_xor = expr_and ("^^" expr_and)*`
/// `expr_or = expr_xor ("||" expr_xor)*`
/// `expr = expr_or`
pub struct Parser {
tokens: TokenStream,
}
impl Parser {
/// Create a new parser from a TokenStream
pub fn new(tokens: TokenStream) -> Self {
Self { tokens }
}
/// Get the current token without consuming it
pub fn curr(&self) -> Option<&Token> {
self.tokens.curr()
}
/// Get the next token without consuming it
pub fn peek(&self) -> Option<&Token> {
self.tokens.peek()
}
/// Advance to the next token, consuming it in the process
pub fn advance(&mut self) -> Option<&Token> {
self.tokens.advance()
}
/// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a
/// sequence of statements.
pub fn parse(&mut self) -> PRes<Ast> {
let mut prog = Vec::new();
while let Some(tok) = self.curr() {
match tok {
// Skip empty statements like duplicate or redundant semicolons
Token::Semicolon => {
self.advance();
continue;
}
_ => prog.push(self.parse_statement()?),
}
}
Ok(Ast::new(prog))
}
/// Parse a statement from the TokenStream. This consists of an expression, a let statement or
/// an assignment.
///
/// ### Grammar
/// `stmt = ( stmt_let | stmt_assign | expr ) ";"`
pub fn parse_statement(&mut self) -> PRes<Statement> {
// Check the current and next char to decide what kind of statement is being parsed
let stmt = match self.curr() {
// A let token -> Parse a let statement
Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(),
// Ident and "=" -> An assignment without declaration (let)
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => {
self.parse_stmt_assign()
}
// Otherwise -> A simple expression
_ => self.parse_expr().map(|expr| Statement::Expr(expr)),
};
// Check that the statement is terminated with a semicolon.
// TODO: This is not needed for block based statements like `while expr { ... }`
if !matches!(self.advance(), Some(Token::Semicolon)) {
panic!("Expected ';' while parsing statement");
}
stmt
}
/// Parse a let statement from the TokenStream. This consists of a let token, an identifier,
/// an equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_let = "let" ident "=" expr`
pub fn parse_stmt_let(&mut self) -> PRes<Statement> {
// Check if the let token is there
if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) {
panic!("Unexpected token while parsing let statement. Expected 'let'");
}
// Fetch the variable name
let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing let statement. Expected ident"),
};
// Check if the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let statement. Expected '='");
}
// Parse the right hand side of the let statement
let rhs = self.parse_expr()?;
let let_binding = Statement::LetBinding(var_name, rhs);
Ok(let_binding)
}
/// Parse an assignment statement from the TokenStream. This consists of a an identifier, an
/// equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_assign = ident "=" expr`
pub fn parse_stmt_assign(&mut self) -> PRes<Statement> {
// Fetch the variable name
let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing assignment statement. Expected ident"),
};
// Check that the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let assignment. Expected '='");
}
// Parse the right hand side of the assignment
let rhs = self.parse_expr()?;
let let_binding = Statement::Assignment(var_name, rhs);
Ok(let_binding)
}
/// The main expression parsing function.
pub fn parse_expr(&mut self) -> PRes<Expr> {
self.parse_expr_or()
}
pub fn parse_expr_or(&mut self) -> PRes<Expr> {
let mut lhs = self.parse_expr_xor()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Or => BinOpType::Or,
_ => break
};
self.advance();
let rhs = self.parse_expr_xor()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_xor(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_and()?;
// Parse 0 or more expressions to the right side of the xor operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Xor => BinOpType::Xor,
_ => break
};
self.advance();
let rhs = self.parse_expr_and()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_and(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_equal()?;
// Parse 0 or more expressions to the right side of the and operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::And => BinOpType::And,
_ => break
};
self.advance();
let rhs = self.parse_expr_equal()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_equal(&mut self) -> PRes<Expr> {
let mut lhs = self.parse_expr_rel()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Eq => BinOpType::Eq,
Op::Neq => BinOpType::Neq,
_ => break
};
self.advance();
let rhs = self.parse_expr_rel()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_rel(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_add()?;
// Parse 0 or more expressions to the right side of the relational operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Gt => BinOpType::Gt,
Op::Lt => BinOpType::Lt,
Op::Ge => BinOpType::Ge,
Op::Le => BinOpType::Le,
_ => break
};
self.advance();
let rhs = self.parse_expr_add()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse an add expression from the TokenStream. This can be a multiplication expression and
/// 0 or more further multiplication expressions separated by addition precedence operators
/// (add '+', sub '-').
///
/// Add is the operator with the lowest precedence which is why this recursively handles all
/// other kinds of expressions.
///
/// ### Grammar
/// `expr_add = expr_mul (("+"|"-") expr_mul)*`
pub fn parse_expr_add(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_mul()?;
// Parse 0 or more expressions to the right side of the add operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Add => BinOpType::Add,
Op::Sub => BinOpType::Sub,
_ => break
};
self.advance();
let rhs = self.parse_expr_mul()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more
/// further terms separated by multiplication precedence operators (multiply '*', divide '/',
/// modulo '%')
///
/// ### Grammar
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*`
pub fn parse_expr_mul(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_term()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Mul => BinOpType::Mul,
Op::Div => BinOpType::Div,
Op::Mod => BinOpType::Mod,
_ => break
};
self.advance();
let rhs = self.parse_expr_term()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse a term expression from the TokenSteam. This can be the negation of a term, an add
/// expression enclosed by parentheses or a value.
///
/// ### Grammar
/// `"-" expr_term | "!" expr_term | "(" expr_add ")" | expr_value`
pub fn parse_expr_term(&mut self) -> PRes<Expr> {
let term = match self.curr() {
// Current token is an opening parentheses '(' -> Must be an enclosed expr_add
Some(Token::Open(Group::Paren)) => {
// Skip the '('
self.advance();
let expr = self.parse_expr()?;
// After the expression must be closing parentheses ')'
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Missing closing parentheses while parsing term");
}
expr
}
// Current token is a minus '-' -> Must be a negated expr_term
Some(Token::Op(Op::Sub)) => {
// Skip the '-'
self.advance();
// Parse an expr_term in a Negation Node
Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into())
}
// Current token is a not '!' -> Must be a not expr_term
Some(Token::Op(Op::Not)) => {
// Skip the '!'
self.advance();
// Parse an expr_term in a Not Node
Expr::UnOp(UnOpType::Not, self.parse_expr_term()?.into())
}
// Nothing special in the current -> Must be an expr_value
_ => self.parse_expr_value()?,
};
Ok(term)
}
/// Parse a value expression from the TokenSteam. This can be a literal value, a function call
/// or a variable.
///
/// ### Grammar
/// `expr_value = expr_literal | expr_fn_call | expr_variable`
pub fn parse_expr_value(&mut self) -> PRes<Expr> {
match self.curr() {
Some(Token::Literal(_)) => self.parse_expr_literal(),
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Open(Group::Paren))) => {
self.parse_expr_fn_call()
}
Some(Token::Ident(_)) => self.parse_expr_varibale(),
_ => panic!("Expected value (literal, variable or function call) while parsing value"),
}
}
/// Parse a function call from the TokenStream. This consists of an identifier and 0 or more
/// add expressions enclosed by parentheses '(', ')' and separated by commas ',' .
///
/// ### Grammar
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"`
pub fn parse_expr_fn_call(&mut self) -> PRes<Expr> {
// The first 2 checks are not really necessary for internal calls since parse_expr_value
// verifies the tokens already
// Get the function name
let fn_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing function call. Expected identifier"),
};
// Check that there really is an opening parentheses
if !matches!(self.advance(), Some(Token::Open(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('");
}
let mut args = Vec::new();
// If there is not a closing parentheses directly after the opening "()", parse at least
// one add expression
// TODO: This is *suboptimal* code
if !matches!(self.curr(), Some(Token::Close(Group::Paren))) {
args.push(self.parse_expr()?);
// As long as there are commas after the expressions, parse more expressions as
// parameters
while matches!(self.curr(), Some(Token::Comma)) {
self.advance();
args.push(self.parse_expr()?);
}
}
// Check if there really is a closing parentheses
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('");
}
// By default don't parse as an intrinsic function
let intrinsic = false;
Ok(Expr::FnCall(FnCall {
intrinsic,
fn_name,
args,
}))
}
/// Parse a variable name value. This consists of an identifier without parentheses afterwards.
/// The identifier represents the variable name.
///
/// ### Grammar
/// `expr_varibale = IDENT`
pub fn parse_expr_varibale(&mut self) -> PRes<Expr> {
match self.advance() {
Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())),
_ => panic!("Unexpected token while parsing variable. Expected identifier"),
}
}
/// Parse a literal value. This consists of a literal token.
///
/// ### Grammar
/// `expr_literal = LITERAL`
pub fn parse_expr_literal(&mut self) -> PRes<Expr> {
match self.advance() {
Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())),
_ => panic!("Unexpected token while parsing literal. Expected literal"),
}
}
}
#[cfg(test)]
mod tests {
use crate::{
ast::{BinOpType, Expr, FnCall, UnOpType},
token::{Group, Literal, Op, Token, TokenStream},
Parser,
};
#[test]
fn test_groupings_neg() {
// let input = "(-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30";
let fn_name = "sqrt".to_string();
let var_name = "a".to_string();
// (-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30
let input_toks = vec![
Token::Open(Group::Paren), // (
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Op(Op::Sub), // -
Token::Literal(Literal::Int64(5)), // 5
Token::Op(Op::Add), // +
Token::Literal(Literal::Int64(2)), // 2
Token::Close(Group::Paren), // )
Token::Op(Op::Mul), // *
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Literal(Literal::Int64(2)), // 2
Token::Op(Op::Mul), // *
Token::Op(Op::Sub), // -
Token::Ident(fn_name.clone()), // sqrt
Token::Open(Group::Paren), // (
Token::Literal(Literal::Int64(9)), // 9
Token::Close(Group::Paren), // )
Token::Close(Group::Paren), // )
Token::Op(Op::Add), // +
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Ident(var_name.clone()), // a
Token::Op(Op::Sub), // -
Token::Literal(Literal::Int64(6)), // 6
Token::Close(Group::Paren), // )
Token::Close(Group::Paren), // )
Token::Op(Op::Mod), // %
Token::Literal(Literal::Int64(30)), // 30
];
// -(-5+2)
let neg_grp_neg_5_add_2 = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Add,
Box::new(Expr::UnOp(
UnOpType::Neg,
Expr::Literal(Literal::Int64(5)).into(),
)),
Expr::Literal(Literal::Int64(2)).into(),
)),
);
// -(2*-sqrt(9))
let neg_grp_2_mul_neg_sqrt = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Mul,
Expr::Literal(Literal::Int64(2)).into(),
Box::new(Expr::UnOp(
UnOpType::Neg,
Expr::FnCall(FnCall {
intrinsic: false,
fn_name,
args: vec![Expr::Literal(Literal::Int64(9))],
})
.into(),
)),
)),
);
// -(-5+2) * -(2*-sqrt(9))
let mul_first = Expr::BinOp(
BinOpType::Mul,
neg_grp_neg_5_add_2.into(),
neg_grp_2_mul_neg_sqrt.into(),
);
// -(a-6)
let neg_grp_a_sub_6 = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Sub,
Expr::Variable(var_name).into(),
Expr::Literal(Literal::Int64(6)).into(),
)),
);
// -(-5+2)*-(2*-sqrt(9)) + -(a-6)
let left_of_mod = Expr::BinOp(BinOpType::Add, mul_first.into(), neg_grp_a_sub_6.into());
// (-(-5+2) * -(2*-sqrt(9)) + -(a-6)) % 30
let expected = Expr::BinOp(
BinOpType::Mod,
left_of_mod.into(),
Expr::Literal(Literal::Int64(30)).into(),
);
// let res = parse_str(input);
let mut parser = Parser::new(TokenStream::new(input_toks));
let res = parser.parse_expr().unwrap();
assert_eq!(expected, res);
}
pub trait Parser {
/// Parse a TokenStream into an Abstract Syntax Tree
fn parse(tokens: TokenStream) -> PRes<Ast>;
}

View File

@ -0,0 +1,575 @@
use crate::{
ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType},
token::{Group, Keyword, Op, Token, TokenStream}, Parser,
};
use super::PRes;
/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree).
///
/// ## Grammar
/// ### Statements
/// `stmt_let = "let" IDENT "=" expr` \
/// `stmt_assign = IDENT "=" expr` \
/// `stmt_fn_def = "fn" IDENT "(" IDENT? ( "," IDENT)* ")" "{" stmt* "}"` \
/// `stmt_if = "if" expr "{" stmt* "}" ( "else" "{" stmt* "}" )?` \
/// `stmt_while = "while" expr "{" stmt* "}"`
/// `stmt = ( stmt_let | stmt_assign | expr | stmt_fn_def | stmt_if | stmt_while | ) ";"` \
///
/// ### Expressions
/// `expr_literal = LITERAL` \
/// `expr_fn_call = IDENT "(" expr? ( "," expr )* ")"` \
/// `expr_varibale = IDENT` \
/// `expr_value = expr_literal | expr_fn_call | expr_variable` \
/// `expr_term = "-" expr_term | "!" expr_term | "(" expr ")" | expr_value` \
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \
/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \
/// `expr_rel = expr_add ((">"|"<"|">="|"<=") expr_add)*`
/// `expr_equal = expr_rel (("=="|"!=") expr_rel)*`
/// `expr_and = expr_equal ("&&" expr_equal)*`
/// `expr_xor = expr_and ("^^" expr_and)*`
/// `expr_or = expr_xor ("||" expr_xor)*`
/// `expr = expr_or`
pub struct CustomParser {
tokens: TokenStream,
}
impl CustomParser {
/// Create a new parser from a TokenStream
pub fn new(tokens: TokenStream) -> Self {
Self { tokens }
}
/// Get the current token without consuming it
pub fn curr(&self) -> Option<&Token> {
self.tokens.curr()
}
/// Get the next token without consuming it
pub fn peek(&self) -> Option<&Token> {
self.tokens.peek()
}
/// Advance to the next token, consuming it in the process
pub fn advance(&mut self) -> Option<&Token> {
self.tokens.advance()
}
/// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a
/// sequence of statements.
pub fn parse(&mut self) -> PRes<Ast> {
let mut prog = Vec::new();
while let Some(tok) = self.curr() {
match tok {
// Skip empty statements like duplicate or redundant semicolons
Token::Semicolon => {
self.advance();
continue;
}
_ => prog.push(self.parse_statement()?),
}
}
Ok(Ast::new(prog))
}
/// Parse a statement from the TokenStream. This consists of an expression, a let statement or
/// an assignment.
///
/// ### Grammar
/// `stmt = ( stmt_let | stmt_assign | expr ) ";"`
pub fn parse_statement(&mut self) -> PRes<Statement> {
// Check the current and next char to decide what kind of statement is being parsed
let stmt = match self.curr() {
// A let token -> Parse a let statement
Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(),
// Ident and "=" -> An assignment without declaration (let)
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => {
self.parse_stmt_assign()
}
// Otherwise -> A simple expression
_ => self.parse_expr().map(|expr| Statement::Expr(expr)),
};
// Check that the statement is terminated with a semicolon.
// TODO: This is not needed for block based statements like `while expr { ... }`
if !matches!(self.advance(), Some(Token::Semicolon)) {
panic!("Expected ';' while parsing statement");
}
stmt
}
/// Parse a let statement from the TokenStream. This consists of a let token, an identifier,
/// an equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_let = "let" ident "=" expr`
pub fn parse_stmt_let(&mut self) -> PRes<Statement> {
// Check if the let token is there
if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) {
panic!("Unexpected token while parsing let statement. Expected 'let'");
}
// Fetch the variable name
let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing let statement. Expected ident"),
};
// Check if the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let statement. Expected '='");
}
// Parse the right hand side of the let statement
let rhs = self.parse_expr()?;
let let_binding = Statement::LetBinding(var_name, rhs);
Ok(let_binding)
}
/// Parse an assignment statement from the TokenStream. This consists of a an identifier, an
/// equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_assign = ident "=" expr`
pub fn parse_stmt_assign(&mut self) -> PRes<Statement> {
// Fetch the variable name
let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing assignment statement. Expected ident"),
};
// Check that the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let assignment. Expected '='");
}
// Parse the right hand side of the assignment
let rhs = self.parse_expr()?;
let let_binding = Statement::Assignment(var_name, rhs);
Ok(let_binding)
}
/// The main expression parsing function.
pub fn parse_expr(&mut self) -> PRes<Expr> {
self.parse_expr_or()
}
pub fn parse_expr_or(&mut self) -> PRes<Expr> {
let mut lhs = self.parse_expr_xor()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Or => BinOpType::Or,
_ => break
};
self.advance();
let rhs = self.parse_expr_xor()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_xor(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_and()?;
// Parse 0 or more expressions to the right side of the xor operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Xor => BinOpType::Xor,
_ => break
};
self.advance();
let rhs = self.parse_expr_and()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_and(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_equal()?;
// Parse 0 or more expressions to the right side of the and operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::And => BinOpType::And,
_ => break
};
self.advance();
let rhs = self.parse_expr_equal()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_equal(&mut self) -> PRes<Expr> {
let mut lhs = self.parse_expr_rel()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Eq => BinOpType::Eq,
Op::Neq => BinOpType::Neq,
_ => break
};
self.advance();
let rhs = self.parse_expr_rel()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
pub fn parse_expr_rel(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_add()?;
// Parse 0 or more expressions to the right side of the relational operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Gt => BinOpType::Gt,
Op::Lt => BinOpType::Lt,
Op::Ge => BinOpType::Ge,
Op::Le => BinOpType::Le,
_ => break
};
self.advance();
let rhs = self.parse_expr_add()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse an add expression from the TokenStream. This can be a multiplication expression and
/// 0 or more further multiplication expressions separated by addition precedence operators
/// (add '+', sub '-').
///
/// Add is the operator with the lowest precedence which is why this recursively handles all
/// other kinds of expressions.
///
/// ### Grammar
/// `expr_add = expr_mul (("+"|"-") expr_mul)*`
pub fn parse_expr_add(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_mul()?;
// Parse 0 or more expressions to the right side of the add operators
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Add => BinOpType::Add,
Op::Sub => BinOpType::Sub,
_ => break
};
self.advance();
let rhs = self.parse_expr_mul()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more
/// further terms separated by multiplication precedence operators (multiply '*', divide '/',
/// modulo '%')
///
/// ### Grammar
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*`
pub fn parse_expr_mul(&mut self) -> PRes<Expr> {
// Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_term()?;
while let Some(Token::Op(optok)) = self.curr() {
let op_type = match optok {
Op::Mul => BinOpType::Mul,
Op::Div => BinOpType::Div,
Op::Mod => BinOpType::Mod,
_ => break
};
self.advance();
let rhs = self.parse_expr_term()?;
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
}
Ok(lhs)
}
/// Parse a term expression from the TokenSteam. This can be the negation of a term, an add
/// expression enclosed by parentheses or a value.
///
/// ### Grammar
/// `"-" expr_term | "!" expr_term | "(" expr_add ")" | expr_value`
pub fn parse_expr_term(&mut self) -> PRes<Expr> {
let term = match self.curr() {
// Current token is an opening parentheses '(' -> Must be an enclosed expr_add
Some(Token::Open(Group::Paren)) => {
// Skip the '('
self.advance();
let expr = self.parse_expr()?;
// After the expression must be closing parentheses ')'
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Missing closing parentheses while parsing term");
}
expr
}
// Current token is a minus '-' -> Must be a negated expr_term
Some(Token::Op(Op::Sub)) => {
// Skip the '-'
self.advance();
// Parse an expr_term in a Negation Node
Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into())
}
// Current token is a not '!' -> Must be a not expr_term
Some(Token::Op(Op::Not)) => {
// Skip the '!'
self.advance();
// Parse an expr_term in a Not Node
Expr::UnOp(UnOpType::Not, self.parse_expr_term()?.into())
}
// Nothing special in the current -> Must be an expr_value
_ => self.parse_expr_value()?,
};
Ok(term)
}
/// Parse a value expression from the TokenSteam. This can be a literal value, a function call
/// or a variable.
///
/// ### Grammar
/// `expr_value = expr_literal | expr_fn_call | expr_variable`
pub fn parse_expr_value(&mut self) -> PRes<Expr> {
match self.curr() {
Some(Token::Literal(_)) => self.parse_expr_literal(),
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Open(Group::Paren))) => {
self.parse_expr_fn_call()
}
Some(Token::Ident(_)) => self.parse_expr_varibale(),
_ => panic!("Expected value (literal, variable or function call) while parsing value"),
}
}
/// Parse a function call from the TokenStream. This consists of an identifier and 0 or more
/// add expressions enclosed by parentheses '(', ')' and separated by commas ',' .
///
/// ### Grammar
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"`
pub fn parse_expr_fn_call(&mut self) -> PRes<Expr> {
// The first 2 checks are not really necessary for internal calls since parse_expr_value
// verifies the tokens already
// Get the function name
let fn_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing function call. Expected identifier"),
};
// Check that there really is an opening parentheses
if !matches!(self.advance(), Some(Token::Open(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('");
}
let mut args = Vec::new();
// If there is not a closing parentheses directly after the opening "()", parse at least
// one add expression
// TODO: This is *suboptimal* code
if !matches!(self.curr(), Some(Token::Close(Group::Paren))) {
args.push(self.parse_expr()?);
// As long as there are commas after the expressions, parse more expressions as
// parameters
while matches!(self.curr(), Some(Token::Comma)) {
self.advance();
args.push(self.parse_expr()?);
}
}
// Check if there really is a closing parentheses
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('");
}
// By default don't parse as an intrinsic function
let intrinsic = false;
Ok(Expr::FnCall(FnCall {
intrinsic,
fn_name,
args,
}))
}
/// Parse a variable name value. This consists of an identifier without parentheses afterwards.
/// The identifier represents the variable name.
///
/// ### Grammar
/// `expr_varibale = IDENT`
pub fn parse_expr_varibale(&mut self) -> PRes<Expr> {
match self.advance() {
Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())),
_ => panic!("Unexpected token while parsing variable. Expected identifier"),
}
}
/// Parse a literal value. This consists of a literal token.
///
/// ### Grammar
/// `expr_literal = LITERAL`
pub fn parse_expr_literal(&mut self) -> PRes<Expr> {
match self.advance() {
Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())),
_ => panic!("Unexpected token while parsing literal. Expected literal"),
}
}
}
impl Parser for CustomParser {
fn parse(tokens: TokenStream) -> PRes<Ast> {
let mut parser = Self::new(tokens);
parser.parse()
}
}
#[cfg(test)]
mod tests {
use crate::{
ast::{BinOpType, Expr, FnCall, UnOpType},
token::{Group, Literal, Op, Token, TokenStream},
CustomParser,
};
#[test]
fn test_groupings_neg() {
// let input = "(-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30";
let fn_name = "sqrt".to_string();
let var_name = "a".to_string();
// (-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30
let input_toks = vec![
Token::Open(Group::Paren), // (
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Op(Op::Sub), // -
Token::Literal(Literal::Int64(5)), // 5
Token::Op(Op::Add), // +
Token::Literal(Literal::Int64(2)), // 2
Token::Close(Group::Paren), // )
Token::Op(Op::Mul), // *
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Literal(Literal::Int64(2)), // 2
Token::Op(Op::Mul), // *
Token::Op(Op::Sub), // -
Token::Ident(fn_name.clone()), // sqrt
Token::Open(Group::Paren), // (
Token::Literal(Literal::Int64(9)), // 9
Token::Close(Group::Paren), // )
Token::Close(Group::Paren), // )
Token::Op(Op::Add), // +
Token::Op(Op::Sub), // -
Token::Open(Group::Paren), // (
Token::Ident(var_name.clone()), // a
Token::Op(Op::Sub), // -
Token::Literal(Literal::Int64(6)), // 6
Token::Close(Group::Paren), // )
Token::Close(Group::Paren), // )
Token::Op(Op::Mod), // %
Token::Literal(Literal::Int64(30)), // 30
];
// -(-5+2)
let neg_grp_neg_5_add_2 = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Add,
Box::new(Expr::UnOp(
UnOpType::Neg,
Expr::Literal(Literal::Int64(5)).into(),
)),
Expr::Literal(Literal::Int64(2)).into(),
)),
);
// -(2*-sqrt(9))
let neg_grp_2_mul_neg_sqrt = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Mul,
Expr::Literal(Literal::Int64(2)).into(),
Box::new(Expr::UnOp(
UnOpType::Neg,
Expr::FnCall(FnCall {
intrinsic: false,
fn_name,
args: vec![Expr::Literal(Literal::Int64(9))],
})
.into(),
)),
)),
);
// -(-5+2) * -(2*-sqrt(9))
let mul_first = Expr::BinOp(
BinOpType::Mul,
neg_grp_neg_5_add_2.into(),
neg_grp_2_mul_neg_sqrt.into(),
);
// -(a-6)
let neg_grp_a_sub_6 = Expr::UnOp(
UnOpType::Neg,
Box::new(Expr::BinOp(
BinOpType::Sub,
Expr::Variable(var_name).into(),
Expr::Literal(Literal::Int64(6)).into(),
)),
);
// -(-5+2)*-(2*-sqrt(9)) + -(a-6)
let left_of_mod = Expr::BinOp(BinOpType::Add, mul_first.into(), neg_grp_a_sub_6.into());
// (-(-5+2) * -(2*-sqrt(9)) + -(a-6)) % 30
let expected = Expr::BinOp(
BinOpType::Mod,
left_of_mod.into(),
Expr::Literal(Literal::Int64(30)).into(),
);
// let res = parse_str(input);
let mut parser = CustomParser::new(TokenStream::new(input_toks));
let res = parser.parse_expr().unwrap();
assert_eq!(expected, res);
}
}