From 110bacbf6d1f5fbfbc57e97683185c0105f07177 Mon Sep 17 00:00:00 2001 From: Daniel M Date: Wed, 29 Dec 2021 15:17:01 +0100 Subject: [PATCH] Refactor parser & interpreter structure - Changed Interpreter & Parser to be traits and the original implementations to be concrete impls for the traits --- plang2_lib/src/interpreter.rs | 112 +--- .../src/interpreter/tree_walk_interpreter.rs | 115 ++++ plang2_lib/src/lib.rs | 4 +- plang2_lib/src/parser.rs | 574 +---------------- plang2_lib/src/parser/custom_parser.rs | 575 ++++++++++++++++++ 5 files changed, 709 insertions(+), 671 deletions(-) create mode 100644 plang2_lib/src/interpreter/tree_walk_interpreter.rs create mode 100644 plang2_lib/src/parser/custom_parser.rs diff --git a/plang2_lib/src/interpreter.rs b/plang2_lib/src/interpreter.rs index 11f3b65..dcf61b5 100644 --- a/plang2_lib/src/interpreter.rs +++ b/plang2_lib/src/interpreter.rs @@ -1,109 +1,13 @@ -use std::collections::HashMap; +mod tree_walk_interpreter; -use crate::{ast::{Statement, Expr, Ast, BinOpType, UnOpType}, token::Literal}; +pub use tree_walk_interpreter::TWInterpreter; -pub struct Interpreter { - prog: Vec, +use crate::ast::Ast; - // TODO: Those varibales are global only, so this will have to change with functions. Also Literal is reused as variable type - variables: HashMap, - - // Print expression statements to stdout - debug: bool, -} - -impl Interpreter { - pub fn new(prog: Ast) -> Self { - let variables = Default::default(); - Self { prog: prog.prog, variables, debug: true } - } - - pub fn run(&mut self) { - - for idx in 0..self.prog.len() { - self.execute_stmt(idx); - } - - } - - pub fn execute_stmt(&mut self, idx: usize) { - - // TODO: The clone here is not optimal - match self.prog[idx].clone() { - Statement::Expr(expr) => { - let expr_result = self.execute_expr(expr.clone()); - if self.debug { - println!("{:?}", expr_result); - } - } - Statement::LetBinding(var_name, expr) => { - let rhs = self.execute_expr(expr); - self.variables.insert(var_name, rhs); - } - Statement::Assignment(var_name, expr) => { - let rhs = self.execute_expr(expr); - *self.variables.get_mut(&var_name).expect("Assigning variable before declaration") = rhs; - }, - Statement::FnDef(_) => todo!(), - Statement::IfStmt(_) => todo!(), - Statement::WhileStmt(_) => todo!(), - } - } - - fn execute_expr(&mut self, expr: Expr) -> Literal { - match expr { - Expr::Literal(lit) => lit, - Expr::Variable(name) => self.variables.get(&name).expect("Using variable before declaration").clone(), - Expr::FnCall(_) => todo!(), - Expr::BinOp(bot, lhs, rhs) => { - - let lhs = match self.execute_expr(*lhs) { - Literal::Int64(val) => val, - _ => panic!("Binary operators for non i64 not yet implemented") - }; - let rhs = match self.execute_expr(*rhs) { - Literal::Int64(val) => val, - _ => panic!("Binary operators for non i64 not yet implemented") - }; - - let res = match bot { - BinOpType::Add => lhs + rhs, - BinOpType::Sub => lhs - rhs, - - BinOpType::Mul => lhs * rhs, - BinOpType::Div => lhs / rhs, - BinOpType::Mod => lhs % rhs, - - BinOpType::Eq => todo!(), - BinOpType::Neq => todo!(), - BinOpType::Gt => todo!(), - BinOpType::Lt => todo!(), - BinOpType::Ge => todo!(), - BinOpType::Le => todo!(), - - BinOpType::And => todo!(), - BinOpType::Or => todo!(), - BinOpType::Xor => todo!(), - }; - - Literal::Int64(res) - } - Expr::UnOp(uot, expr) => { - match uot { - UnOpType::Neg => { - let mut res = self.execute_expr(*expr); - match &mut res { - Literal::Boolean(_) => panic!("Can't negate bool"), - Literal::Int64(val) => *val *= -1, - Literal::String(_) => panic!("Can't negate string"), - }; - - res - } - UnOpType::Not => todo!(), - } - } - } - } +pub trait Interpreter { + /// Initialize the interpreter with the given program + fn new(prog: Ast) -> Self; + /// Run the program + fn run(&mut self); } diff --git a/plang2_lib/src/interpreter/tree_walk_interpreter.rs b/plang2_lib/src/interpreter/tree_walk_interpreter.rs new file mode 100644 index 0000000..f40338f --- /dev/null +++ b/plang2_lib/src/interpreter/tree_walk_interpreter.rs @@ -0,0 +1,115 @@ +use std::collections::HashMap; + +use crate::{ast::{Statement, Expr, Ast, BinOpType, UnOpType}, token::Literal}; + +use super::Interpreter; + +/// Tree-Walk-Interpreter +pub struct TWInterpreter { + prog: Vec, + + // TODO: Those varibales are global only, so this will have to change with functions. Also Literal is reused as variable type + variables: HashMap, + + // Print expression statements to stdout + debug: bool, +} + +impl Interpreter for TWInterpreter { + fn new(prog: Ast) -> Self { + let variables = Default::default(); + Self { prog: prog.prog, variables, debug: true } + } + + fn run(&mut self) { + + for idx in 0..self.prog.len() { + self.execute_stmt(idx); + } + + } +} + +impl TWInterpreter { + + pub fn execute_stmt(&mut self, idx: usize) { + + // TODO: The clone here is not optimal + match self.prog[idx].clone() { + Statement::Expr(expr) => { + let expr_result = self.execute_expr(expr.clone()); + if self.debug { + println!("{:?}", expr_result); + } + } + Statement::LetBinding(var_name, expr) => { + let rhs = self.execute_expr(expr); + self.variables.insert(var_name, rhs); + } + Statement::Assignment(var_name, expr) => { + let rhs = self.execute_expr(expr); + *self.variables.get_mut(&var_name).expect("Assigning variable before declaration") = rhs; + }, + Statement::FnDef(_) => todo!(), + Statement::IfStmt(_) => todo!(), + Statement::WhileStmt(_) => todo!(), + } + } + + fn execute_expr(&mut self, expr: Expr) -> Literal { + match expr { + Expr::Literal(lit) => lit, + Expr::Variable(name) => self.variables.get(&name).expect("Using variable before declaration").clone(), + Expr::FnCall(_) => todo!(), + Expr::BinOp(bot, lhs, rhs) => { + + let lhs = match self.execute_expr(*lhs) { + Literal::Int64(val) => val, + _ => panic!("Binary operators for non i64 not yet implemented") + }; + let rhs = match self.execute_expr(*rhs) { + Literal::Int64(val) => val, + _ => panic!("Binary operators for non i64 not yet implemented") + }; + + let res = match bot { + BinOpType::Add => lhs + rhs, + BinOpType::Sub => lhs - rhs, + + BinOpType::Mul => lhs * rhs, + BinOpType::Div => lhs / rhs, + BinOpType::Mod => lhs % rhs, + + BinOpType::Eq => todo!(), + BinOpType::Neq => todo!(), + BinOpType::Gt => todo!(), + BinOpType::Lt => todo!(), + BinOpType::Ge => todo!(), + BinOpType::Le => todo!(), + + BinOpType::And => todo!(), + BinOpType::Or => todo!(), + BinOpType::Xor => todo!(), + }; + + Literal::Int64(res) + } + Expr::UnOp(uot, expr) => { + match uot { + UnOpType::Neg => { + let mut res = self.execute_expr(*expr); + match &mut res { + Literal::Boolean(_) => panic!("Can't negate bool"), + Literal::Int64(val) => *val *= -1, + Literal::String(_) => panic!("Can't negate string"), + }; + + res + } + UnOpType::Not => todo!(), + } + } + } + } + +} diff --git a/plang2_lib/src/lib.rs b/plang2_lib/src/lib.rs index fc3a712..1e978eb 100644 --- a/plang2_lib/src/lib.rs +++ b/plang2_lib/src/lib.rs @@ -5,5 +5,5 @@ pub mod parser; pub mod interpreter; pub use lexer::Lexer; -pub use parser::Parser; -pub use interpreter::Interpreter; +pub use parser::{Parser, CustomParser}; +pub use interpreter::TWInterpreter; diff --git a/plang2_lib/src/parser.rs b/plang2_lib/src/parser.rs index 87854a0..ac3bae8 100644 --- a/plang2_lib/src/parser.rs +++ b/plang2_lib/src/parser.rs @@ -1,572 +1,16 @@ -use crate::{ - ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType}, - token::{Group, Keyword, Op, Token, TokenStream}, -}; +use crate::{ast::Ast, token::TokenStream}; + +mod custom_parser; + +pub use custom_parser::CustomParser; /// TODO: Real parsing errors instead of panics in the Parser #[derive(Debug)] pub struct ParseErr; -type PRes = Result; +pub(crate) type PRes = Result; -/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree). -/// -/// ## Grammar -/// ### Statements -/// `stmt_let = "let" IDENT "=" expr` \ -/// `stmt_assign = IDENT "=" expr` \ -/// `stmt_fn_def = "fn" IDENT "(" IDENT? ( "," IDENT)* ")" "{" stmt* "}"` \ -/// `stmt_if = "if" expr "{" stmt* "}" ( "else" "{" stmt* "}" )?` \ -/// `stmt_while = "while" expr "{" stmt* "}"` -/// `stmt = ( stmt_let | stmt_assign | expr | stmt_fn_def | stmt_if | stmt_while | ) ";"` \ -/// -/// ### Expressions -/// `expr_literal = LITERAL` \ -/// `expr_fn_call = IDENT "(" expr? ( "," expr )* ")"` \ -/// `expr_varibale = IDENT` \ -/// `expr_value = expr_literal | expr_fn_call | expr_variable` \ -/// `expr_term = "-" expr_term | "!" expr_term | "(" expr ")" | expr_value` \ -/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \ -/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \ -/// `expr_rel = expr_add ((">"|"<"|">="|"<=") expr_add)*` -/// `expr_equal = expr_rel (("=="|"!=") expr_rel)*` -/// `expr_and = expr_equal ("&&" expr_equal)*` -/// `expr_xor = expr_and ("^^" expr_and)*` -/// `expr_or = expr_xor ("||" expr_xor)*` -/// `expr = expr_or` -pub struct Parser { - tokens: TokenStream, -} - -impl Parser { - /// Create a new parser from a TokenStream - pub fn new(tokens: TokenStream) -> Self { - Self { tokens } - } - - /// Get the current token without consuming it - pub fn curr(&self) -> Option<&Token> { - self.tokens.curr() - } - - /// Get the next token without consuming it - pub fn peek(&self) -> Option<&Token> { - self.tokens.peek() - } - - /// Advance to the next token, consuming it in the process - pub fn advance(&mut self) -> Option<&Token> { - self.tokens.advance() - } - - /// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a - /// sequence of statements. - pub fn parse(&mut self) -> PRes { - let mut prog = Vec::new(); - - while let Some(tok) = self.curr() { - match tok { - // Skip empty statements like duplicate or redundant semicolons - Token::Semicolon => { - self.advance(); - continue; - } - _ => prog.push(self.parse_statement()?), - } - } - - Ok(Ast::new(prog)) - } - - /// Parse a statement from the TokenStream. This consists of an expression, a let statement or - /// an assignment. - /// - /// ### Grammar - /// `stmt = ( stmt_let | stmt_assign | expr ) ";"` - pub fn parse_statement(&mut self) -> PRes { - // Check the current and next char to decide what kind of statement is being parsed - let stmt = match self.curr() { - // A let token -> Parse a let statement - Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(), - // Ident and "=" -> An assignment without declaration (let) - Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => { - self.parse_stmt_assign() - } - // Otherwise -> A simple expression - _ => self.parse_expr().map(|expr| Statement::Expr(expr)), - }; - - // Check that the statement is terminated with a semicolon. - // TODO: This is not needed for block based statements like `while expr { ... }` - if !matches!(self.advance(), Some(Token::Semicolon)) { - panic!("Expected ';' while parsing statement"); - } - - stmt - } - - /// Parse a let statement from the TokenStream. This consists of a let token, an identifier, - /// an equal sign "=" and an expression. - /// - /// ### Grammar - /// `stmt_let = "let" ident "=" expr` - pub fn parse_stmt_let(&mut self) -> PRes { - // Check if the let token is there - if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) { - panic!("Unexpected token while parsing let statement. Expected 'let'"); - } - - // Fetch the variable name - let var_name = match self.advance() { - Some(Token::Ident(ident)) => ident.clone(), - _ => panic!("Unexpected token while parsing let statement. Expected ident"), - }; - - // Check if the equal sign is present - if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { - panic!("Unexpected token while parsing let statement. Expected '='"); - } - - // Parse the right hand side of the let statement - let rhs = self.parse_expr()?; - - let let_binding = Statement::LetBinding(var_name, rhs); - - Ok(let_binding) - } - - /// Parse an assignment statement from the TokenStream. This consists of a an identifier, an - /// equal sign "=" and an expression. - /// - /// ### Grammar - /// `stmt_assign = ident "=" expr` - pub fn parse_stmt_assign(&mut self) -> PRes { - // Fetch the variable name - let var_name = match self.advance() { - Some(Token::Ident(ident)) => ident.clone(), - _ => panic!("Unexpected token while parsing assignment statement. Expected ident"), - }; - - // Check that the equal sign is present - if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { - panic!("Unexpected token while parsing let assignment. Expected '='"); - } - - // Parse the right hand side of the assignment - let rhs = self.parse_expr()?; - - let let_binding = Statement::Assignment(var_name, rhs); - - Ok(let_binding) - } - - /// The main expression parsing function. - pub fn parse_expr(&mut self) -> PRes { - self.parse_expr_or() - } - - pub fn parse_expr_or(&mut self) -> PRes { - let mut lhs = self.parse_expr_xor()?; - - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Or => BinOpType::Or, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_xor()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - pub fn parse_expr_xor(&mut self) -> PRes { - // Parse the left hand side / the main expression if there is nothing on the right - let mut lhs = self.parse_expr_and()?; - - // Parse 0 or more expressions to the right side of the xor operators - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Xor => BinOpType::Xor, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_and()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - pub fn parse_expr_and(&mut self) -> PRes { - // Parse the left hand side / the main expression if there is nothing on the right - let mut lhs = self.parse_expr_equal()?; - - // Parse 0 or more expressions to the right side of the and operators - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::And => BinOpType::And, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_equal()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - pub fn parse_expr_equal(&mut self) -> PRes { - let mut lhs = self.parse_expr_rel()?; - - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Eq => BinOpType::Eq, - Op::Neq => BinOpType::Neq, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_rel()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - pub fn parse_expr_rel(&mut self) -> PRes { - // Parse the left hand side / the main expression if there is nothing on the right - let mut lhs = self.parse_expr_add()?; - - // Parse 0 or more expressions to the right side of the relational operators - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Gt => BinOpType::Gt, - Op::Lt => BinOpType::Lt, - Op::Ge => BinOpType::Ge, - Op::Le => BinOpType::Le, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_add()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - /// Parse an add expression from the TokenStream. This can be a multiplication expression and - /// 0 or more further multiplication expressions separated by addition precedence operators - /// (add '+', sub '-'). - /// - /// Add is the operator with the lowest precedence which is why this recursively handles all - /// other kinds of expressions. - /// - /// ### Grammar - /// `expr_add = expr_mul (("+"|"-") expr_mul)*` - pub fn parse_expr_add(&mut self) -> PRes { - // Parse the left hand side / the main expression if there is nothing on the right - let mut lhs = self.parse_expr_mul()?; - - // Parse 0 or more expressions to the right side of the add operators - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Add => BinOpType::Add, - Op::Sub => BinOpType::Sub, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_mul()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - /// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more - /// further terms separated by multiplication precedence operators (multiply '*', divide '/', - /// modulo '%') - /// - /// ### Grammar - /// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` - pub fn parse_expr_mul(&mut self) -> PRes { - // Parse the left hand side / the main expression if there is nothing on the right - let mut lhs = self.parse_expr_term()?; - - while let Some(Token::Op(optok)) = self.curr() { - let op_type = match optok { - Op::Mul => BinOpType::Mul, - Op::Div => BinOpType::Div, - Op::Mod => BinOpType::Mod, - _ => break - }; - self.advance(); - - let rhs = self.parse_expr_term()?; - - lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); - } - - Ok(lhs) - } - - /// Parse a term expression from the TokenSteam. This can be the negation of a term, an add - /// expression enclosed by parentheses or a value. - /// - /// ### Grammar - /// `"-" expr_term | "!" expr_term | "(" expr_add ")" | expr_value` - pub fn parse_expr_term(&mut self) -> PRes { - let term = match self.curr() { - // Current token is an opening parentheses '(' -> Must be an enclosed expr_add - Some(Token::Open(Group::Paren)) => { - // Skip the '(' - self.advance(); - - let expr = self.parse_expr()?; - - // After the expression must be closing parentheses ')' - if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { - panic!("Missing closing parentheses while parsing term"); - } - - expr - } - // Current token is a minus '-' -> Must be a negated expr_term - Some(Token::Op(Op::Sub)) => { - // Skip the '-' - self.advance(); - - // Parse an expr_term in a Negation Node - Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) - } - // Current token is a not '!' -> Must be a not expr_term - Some(Token::Op(Op::Not)) => { - // Skip the '!' - self.advance(); - - // Parse an expr_term in a Not Node - Expr::UnOp(UnOpType::Not, self.parse_expr_term()?.into()) - } - // Nothing special in the current -> Must be an expr_value - _ => self.parse_expr_value()?, - }; - Ok(term) - } - - /// Parse a value expression from the TokenSteam. This can be a literal value, a function call - /// or a variable. - /// - /// ### Grammar - /// `expr_value = expr_literal | expr_fn_call | expr_variable` - pub fn parse_expr_value(&mut self) -> PRes { - match self.curr() { - Some(Token::Literal(_)) => self.parse_expr_literal(), - Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Open(Group::Paren))) => { - self.parse_expr_fn_call() - } - Some(Token::Ident(_)) => self.parse_expr_varibale(), - _ => panic!("Expected value (literal, variable or function call) while parsing value"), - } - } - - /// Parse a function call from the TokenStream. This consists of an identifier and 0 or more - /// add expressions enclosed by parentheses '(', ')' and separated by commas ',' . - /// - /// ### Grammar - /// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` - pub fn parse_expr_fn_call(&mut self) -> PRes { - // The first 2 checks are not really necessary for internal calls since parse_expr_value - // verifies the tokens already - - // Get the function name - let fn_name = match self.advance() { - Some(Token::Ident(ident)) => ident.clone(), - _ => panic!("Unexpected token while parsing function call. Expected identifier"), - }; - - // Check that there really is an opening parentheses - if !matches!(self.advance(), Some(Token::Open(Group::Paren))) { - panic!("Unexpected token while parsing function call. Expected '('"); - } - - let mut args = Vec::new(); - - // If there is not a closing parentheses directly after the opening "()", parse at least - // one add expression - // TODO: This is *suboptimal* code - if !matches!(self.curr(), Some(Token::Close(Group::Paren))) { - args.push(self.parse_expr()?); - - // As long as there are commas after the expressions, parse more expressions as - // parameters - while matches!(self.curr(), Some(Token::Comma)) { - self.advance(); - args.push(self.parse_expr()?); - } - } - - // Check if there really is a closing parentheses - if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { - panic!("Unexpected token while parsing function call. Expected '('"); - } - - // By default don't parse as an intrinsic function - let intrinsic = false; - - Ok(Expr::FnCall(FnCall { - intrinsic, - fn_name, - args, - })) - } - - /// Parse a variable name value. This consists of an identifier without parentheses afterwards. - /// The identifier represents the variable name. - /// - /// ### Grammar - /// `expr_varibale = IDENT` - pub fn parse_expr_varibale(&mut self) -> PRes { - match self.advance() { - Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())), - _ => panic!("Unexpected token while parsing variable. Expected identifier"), - } - } - - /// Parse a literal value. This consists of a literal token. - /// - /// ### Grammar - /// `expr_literal = LITERAL` - pub fn parse_expr_literal(&mut self) -> PRes { - match self.advance() { - Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())), - _ => panic!("Unexpected token while parsing literal. Expected literal"), - } - } -} - -#[cfg(test)] -mod tests { - use crate::{ - ast::{BinOpType, Expr, FnCall, UnOpType}, - token::{Group, Literal, Op, Token, TokenStream}, - Parser, - }; - - #[test] - fn test_groupings_neg() { - // let input = "(-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30"; - - let fn_name = "sqrt".to_string(); - let var_name = "a".to_string(); - - // (-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30 - let input_toks = vec![ - Token::Open(Group::Paren), // ( - Token::Op(Op::Sub), // - - Token::Open(Group::Paren), // ( - Token::Op(Op::Sub), // - - Token::Literal(Literal::Int64(5)), // 5 - Token::Op(Op::Add), // + - Token::Literal(Literal::Int64(2)), // 2 - Token::Close(Group::Paren), // ) - Token::Op(Op::Mul), // * - Token::Op(Op::Sub), // - - Token::Open(Group::Paren), // ( - Token::Literal(Literal::Int64(2)), // 2 - Token::Op(Op::Mul), // * - Token::Op(Op::Sub), // - - Token::Ident(fn_name.clone()), // sqrt - Token::Open(Group::Paren), // ( - Token::Literal(Literal::Int64(9)), // 9 - Token::Close(Group::Paren), // ) - Token::Close(Group::Paren), // ) - Token::Op(Op::Add), // + - Token::Op(Op::Sub), // - - Token::Open(Group::Paren), // ( - Token::Ident(var_name.clone()), // a - Token::Op(Op::Sub), // - - Token::Literal(Literal::Int64(6)), // 6 - Token::Close(Group::Paren), // ) - Token::Close(Group::Paren), // ) - Token::Op(Op::Mod), // % - Token::Literal(Literal::Int64(30)), // 30 - ]; - - // -(-5+2) - let neg_grp_neg_5_add_2 = Expr::UnOp( - UnOpType::Neg, - Box::new(Expr::BinOp( - BinOpType::Add, - Box::new(Expr::UnOp( - UnOpType::Neg, - Expr::Literal(Literal::Int64(5)).into(), - )), - Expr::Literal(Literal::Int64(2)).into(), - )), - ); - - // -(2*-sqrt(9)) - let neg_grp_2_mul_neg_sqrt = Expr::UnOp( - UnOpType::Neg, - Box::new(Expr::BinOp( - BinOpType::Mul, - Expr::Literal(Literal::Int64(2)).into(), - Box::new(Expr::UnOp( - UnOpType::Neg, - Expr::FnCall(FnCall { - intrinsic: false, - fn_name, - args: vec![Expr::Literal(Literal::Int64(9))], - }) - .into(), - )), - )), - ); - - // -(-5+2) * -(2*-sqrt(9)) - let mul_first = Expr::BinOp( - BinOpType::Mul, - neg_grp_neg_5_add_2.into(), - neg_grp_2_mul_neg_sqrt.into(), - ); - - // -(a-6) - let neg_grp_a_sub_6 = Expr::UnOp( - UnOpType::Neg, - Box::new(Expr::BinOp( - BinOpType::Sub, - Expr::Variable(var_name).into(), - Expr::Literal(Literal::Int64(6)).into(), - )), - ); - - // -(-5+2)*-(2*-sqrt(9)) + -(a-6) - let left_of_mod = Expr::BinOp(BinOpType::Add, mul_first.into(), neg_grp_a_sub_6.into()); - - // (-(-5+2) * -(2*-sqrt(9)) + -(a-6)) % 30 - let expected = Expr::BinOp( - BinOpType::Mod, - left_of_mod.into(), - Expr::Literal(Literal::Int64(30)).into(), - ); - - // let res = parse_str(input); - - let mut parser = Parser::new(TokenStream::new(input_toks)); - let res = parser.parse_expr().unwrap(); - - assert_eq!(expected, res); - } +pub trait Parser { + /// Parse a TokenStream into an Abstract Syntax Tree + fn parse(tokens: TokenStream) -> PRes; } diff --git a/plang2_lib/src/parser/custom_parser.rs b/plang2_lib/src/parser/custom_parser.rs new file mode 100644 index 0000000..d3f68e0 --- /dev/null +++ b/plang2_lib/src/parser/custom_parser.rs @@ -0,0 +1,575 @@ +use crate::{ + ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType}, + token::{Group, Keyword, Op, Token, TokenStream}, Parser, +}; + +use super::PRes; + +/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree). +/// +/// ## Grammar +/// ### Statements +/// `stmt_let = "let" IDENT "=" expr` \ +/// `stmt_assign = IDENT "=" expr` \ +/// `stmt_fn_def = "fn" IDENT "(" IDENT? ( "," IDENT)* ")" "{" stmt* "}"` \ +/// `stmt_if = "if" expr "{" stmt* "}" ( "else" "{" stmt* "}" )?` \ +/// `stmt_while = "while" expr "{" stmt* "}"` +/// `stmt = ( stmt_let | stmt_assign | expr | stmt_fn_def | stmt_if | stmt_while | ) ";"` \ +/// +/// ### Expressions +/// `expr_literal = LITERAL` \ +/// `expr_fn_call = IDENT "(" expr? ( "," expr )* ")"` \ +/// `expr_varibale = IDENT` \ +/// `expr_value = expr_literal | expr_fn_call | expr_variable` \ +/// `expr_term = "-" expr_term | "!" expr_term | "(" expr ")" | expr_value` \ +/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \ +/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \ +/// `expr_rel = expr_add ((">"|"<"|">="|"<=") expr_add)*` +/// `expr_equal = expr_rel (("=="|"!=") expr_rel)*` +/// `expr_and = expr_equal ("&&" expr_equal)*` +/// `expr_xor = expr_and ("^^" expr_and)*` +/// `expr_or = expr_xor ("||" expr_xor)*` +/// `expr = expr_or` +pub struct CustomParser { + tokens: TokenStream, +} + +impl CustomParser { + /// Create a new parser from a TokenStream + pub fn new(tokens: TokenStream) -> Self { + Self { tokens } + } + + /// Get the current token without consuming it + pub fn curr(&self) -> Option<&Token> { + self.tokens.curr() + } + + /// Get the next token without consuming it + pub fn peek(&self) -> Option<&Token> { + self.tokens.peek() + } + + /// Advance to the next token, consuming it in the process + pub fn advance(&mut self) -> Option<&Token> { + self.tokens.advance() + } + + /// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a + /// sequence of statements. + pub fn parse(&mut self) -> PRes { + let mut prog = Vec::new(); + + while let Some(tok) = self.curr() { + match tok { + // Skip empty statements like duplicate or redundant semicolons + Token::Semicolon => { + self.advance(); + continue; + } + _ => prog.push(self.parse_statement()?), + } + } + + Ok(Ast::new(prog)) + } + + /// Parse a statement from the TokenStream. This consists of an expression, a let statement or + /// an assignment. + /// + /// ### Grammar + /// `stmt = ( stmt_let | stmt_assign | expr ) ";"` + pub fn parse_statement(&mut self) -> PRes { + // Check the current and next char to decide what kind of statement is being parsed + let stmt = match self.curr() { + // A let token -> Parse a let statement + Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(), + // Ident and "=" -> An assignment without declaration (let) + Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => { + self.parse_stmt_assign() + } + // Otherwise -> A simple expression + _ => self.parse_expr().map(|expr| Statement::Expr(expr)), + }; + + // Check that the statement is terminated with a semicolon. + // TODO: This is not needed for block based statements like `while expr { ... }` + if !matches!(self.advance(), Some(Token::Semicolon)) { + panic!("Expected ';' while parsing statement"); + } + + stmt + } + + /// Parse a let statement from the TokenStream. This consists of a let token, an identifier, + /// an equal sign "=" and an expression. + /// + /// ### Grammar + /// `stmt_let = "let" ident "=" expr` + pub fn parse_stmt_let(&mut self) -> PRes { + // Check if the let token is there + if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) { + panic!("Unexpected token while parsing let statement. Expected 'let'"); + } + + // Fetch the variable name + let var_name = match self.advance() { + Some(Token::Ident(ident)) => ident.clone(), + _ => panic!("Unexpected token while parsing let statement. Expected ident"), + }; + + // Check if the equal sign is present + if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { + panic!("Unexpected token while parsing let statement. Expected '='"); + } + + // Parse the right hand side of the let statement + let rhs = self.parse_expr()?; + + let let_binding = Statement::LetBinding(var_name, rhs); + + Ok(let_binding) + } + + /// Parse an assignment statement from the TokenStream. This consists of a an identifier, an + /// equal sign "=" and an expression. + /// + /// ### Grammar + /// `stmt_assign = ident "=" expr` + pub fn parse_stmt_assign(&mut self) -> PRes { + // Fetch the variable name + let var_name = match self.advance() { + Some(Token::Ident(ident)) => ident.clone(), + _ => panic!("Unexpected token while parsing assignment statement. Expected ident"), + }; + + // Check that the equal sign is present + if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { + panic!("Unexpected token while parsing let assignment. Expected '='"); + } + + // Parse the right hand side of the assignment + let rhs = self.parse_expr()?; + + let let_binding = Statement::Assignment(var_name, rhs); + + Ok(let_binding) + } + + /// The main expression parsing function. + pub fn parse_expr(&mut self) -> PRes { + self.parse_expr_or() + } + + pub fn parse_expr_or(&mut self) -> PRes { + let mut lhs = self.parse_expr_xor()?; + + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Or => BinOpType::Or, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_xor()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_xor(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_and()?; + + // Parse 0 or more expressions to the right side of the xor operators + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Xor => BinOpType::Xor, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_and()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_and(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_equal()?; + + // Parse 0 or more expressions to the right side of the and operators + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::And => BinOpType::And, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_equal()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_equal(&mut self) -> PRes { + let mut lhs = self.parse_expr_rel()?; + + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Eq => BinOpType::Eq, + Op::Neq => BinOpType::Neq, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_rel()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_rel(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_add()?; + + // Parse 0 or more expressions to the right side of the relational operators + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Gt => BinOpType::Gt, + Op::Lt => BinOpType::Lt, + Op::Ge => BinOpType::Ge, + Op::Le => BinOpType::Le, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_add()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + /// Parse an add expression from the TokenStream. This can be a multiplication expression and + /// 0 or more further multiplication expressions separated by addition precedence operators + /// (add '+', sub '-'). + /// + /// Add is the operator with the lowest precedence which is why this recursively handles all + /// other kinds of expressions. + /// + /// ### Grammar + /// `expr_add = expr_mul (("+"|"-") expr_mul)*` + pub fn parse_expr_add(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_mul()?; + + // Parse 0 or more expressions to the right side of the add operators + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Add => BinOpType::Add, + Op::Sub => BinOpType::Sub, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_mul()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + /// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more + /// further terms separated by multiplication precedence operators (multiply '*', divide '/', + /// modulo '%') + /// + /// ### Grammar + /// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` + pub fn parse_expr_mul(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_term()?; + + while let Some(Token::Op(optok)) = self.curr() { + let op_type = match optok { + Op::Mul => BinOpType::Mul, + Op::Div => BinOpType::Div, + Op::Mod => BinOpType::Mod, + _ => break + }; + self.advance(); + + let rhs = self.parse_expr_term()?; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + /// Parse a term expression from the TokenSteam. This can be the negation of a term, an add + /// expression enclosed by parentheses or a value. + /// + /// ### Grammar + /// `"-" expr_term | "!" expr_term | "(" expr_add ")" | expr_value` + pub fn parse_expr_term(&mut self) -> PRes { + let term = match self.curr() { + // Current token is an opening parentheses '(' -> Must be an enclosed expr_add + Some(Token::Open(Group::Paren)) => { + // Skip the '(' + self.advance(); + + let expr = self.parse_expr()?; + + // After the expression must be closing parentheses ')' + if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { + panic!("Missing closing parentheses while parsing term"); + } + + expr + } + // Current token is a minus '-' -> Must be a negated expr_term + Some(Token::Op(Op::Sub)) => { + // Skip the '-' + self.advance(); + + // Parse an expr_term in a Negation Node + Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) + } + // Current token is a not '!' -> Must be a not expr_term + Some(Token::Op(Op::Not)) => { + // Skip the '!' + self.advance(); + + // Parse an expr_term in a Not Node + Expr::UnOp(UnOpType::Not, self.parse_expr_term()?.into()) + } + // Nothing special in the current -> Must be an expr_value + _ => self.parse_expr_value()?, + }; + Ok(term) + } + + /// Parse a value expression from the TokenSteam. This can be a literal value, a function call + /// or a variable. + /// + /// ### Grammar + /// `expr_value = expr_literal | expr_fn_call | expr_variable` + pub fn parse_expr_value(&mut self) -> PRes { + match self.curr() { + Some(Token::Literal(_)) => self.parse_expr_literal(), + Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Open(Group::Paren))) => { + self.parse_expr_fn_call() + } + Some(Token::Ident(_)) => self.parse_expr_varibale(), + _ => panic!("Expected value (literal, variable or function call) while parsing value"), + } + } + + /// Parse a function call from the TokenStream. This consists of an identifier and 0 or more + /// add expressions enclosed by parentheses '(', ')' and separated by commas ',' . + /// + /// ### Grammar + /// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` + pub fn parse_expr_fn_call(&mut self) -> PRes { + // The first 2 checks are not really necessary for internal calls since parse_expr_value + // verifies the tokens already + + // Get the function name + let fn_name = match self.advance() { + Some(Token::Ident(ident)) => ident.clone(), + _ => panic!("Unexpected token while parsing function call. Expected identifier"), + }; + + // Check that there really is an opening parentheses + if !matches!(self.advance(), Some(Token::Open(Group::Paren))) { + panic!("Unexpected token while parsing function call. Expected '('"); + } + + let mut args = Vec::new(); + + // If there is not a closing parentheses directly after the opening "()", parse at least + // one add expression + // TODO: This is *suboptimal* code + if !matches!(self.curr(), Some(Token::Close(Group::Paren))) { + args.push(self.parse_expr()?); + + // As long as there are commas after the expressions, parse more expressions as + // parameters + while matches!(self.curr(), Some(Token::Comma)) { + self.advance(); + args.push(self.parse_expr()?); + } + } + + // Check if there really is a closing parentheses + if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { + panic!("Unexpected token while parsing function call. Expected '('"); + } + + // By default don't parse as an intrinsic function + let intrinsic = false; + + Ok(Expr::FnCall(FnCall { + intrinsic, + fn_name, + args, + })) + } + + /// Parse a variable name value. This consists of an identifier without parentheses afterwards. + /// The identifier represents the variable name. + /// + /// ### Grammar + /// `expr_varibale = IDENT` + pub fn parse_expr_varibale(&mut self) -> PRes { + match self.advance() { + Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())), + _ => panic!("Unexpected token while parsing variable. Expected identifier"), + } + } + + /// Parse a literal value. This consists of a literal token. + /// + /// ### Grammar + /// `expr_literal = LITERAL` + pub fn parse_expr_literal(&mut self) -> PRes { + match self.advance() { + Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())), + _ => panic!("Unexpected token while parsing literal. Expected literal"), + } + } +} + +impl Parser for CustomParser { + fn parse(tokens: TokenStream) -> PRes { + let mut parser = Self::new(tokens); + parser.parse() + } +} + +#[cfg(test)] +mod tests { + use crate::{ + ast::{BinOpType, Expr, FnCall, UnOpType}, + token::{Group, Literal, Op, Token, TokenStream}, + CustomParser, + }; + + #[test] + fn test_groupings_neg() { + // let input = "(-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30"; + + let fn_name = "sqrt".to_string(); + let var_name = "a".to_string(); + + // (-(-5+2)*-(2*-sqrt(9))+-(a-6)) % 30 + let input_toks = vec![ + Token::Open(Group::Paren), // ( + Token::Op(Op::Sub), // - + Token::Open(Group::Paren), // ( + Token::Op(Op::Sub), // - + Token::Literal(Literal::Int64(5)), // 5 + Token::Op(Op::Add), // + + Token::Literal(Literal::Int64(2)), // 2 + Token::Close(Group::Paren), // ) + Token::Op(Op::Mul), // * + Token::Op(Op::Sub), // - + Token::Open(Group::Paren), // ( + Token::Literal(Literal::Int64(2)), // 2 + Token::Op(Op::Mul), // * + Token::Op(Op::Sub), // - + Token::Ident(fn_name.clone()), // sqrt + Token::Open(Group::Paren), // ( + Token::Literal(Literal::Int64(9)), // 9 + Token::Close(Group::Paren), // ) + Token::Close(Group::Paren), // ) + Token::Op(Op::Add), // + + Token::Op(Op::Sub), // - + Token::Open(Group::Paren), // ( + Token::Ident(var_name.clone()), // a + Token::Op(Op::Sub), // - + Token::Literal(Literal::Int64(6)), // 6 + Token::Close(Group::Paren), // ) + Token::Close(Group::Paren), // ) + Token::Op(Op::Mod), // % + Token::Literal(Literal::Int64(30)), // 30 + ]; + + // -(-5+2) + let neg_grp_neg_5_add_2 = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Add, + Box::new(Expr::UnOp( + UnOpType::Neg, + Expr::Literal(Literal::Int64(5)).into(), + )), + Expr::Literal(Literal::Int64(2)).into(), + )), + ); + + // -(2*-sqrt(9)) + let neg_grp_2_mul_neg_sqrt = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Mul, + Expr::Literal(Literal::Int64(2)).into(), + Box::new(Expr::UnOp( + UnOpType::Neg, + Expr::FnCall(FnCall { + intrinsic: false, + fn_name, + args: vec![Expr::Literal(Literal::Int64(9))], + }) + .into(), + )), + )), + ); + + // -(-5+2) * -(2*-sqrt(9)) + let mul_first = Expr::BinOp( + BinOpType::Mul, + neg_grp_neg_5_add_2.into(), + neg_grp_2_mul_neg_sqrt.into(), + ); + + // -(a-6) + let neg_grp_a_sub_6 = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Sub, + Expr::Variable(var_name).into(), + Expr::Literal(Literal::Int64(6)).into(), + )), + ); + + // -(-5+2)*-(2*-sqrt(9)) + -(a-6) + let left_of_mod = Expr::BinOp(BinOpType::Add, mul_first.into(), neg_grp_a_sub_6.into()); + + // (-(-5+2) * -(2*-sqrt(9)) + -(a-6)) % 30 + let expected = Expr::BinOp( + BinOpType::Mod, + left_of_mod.into(), + Expr::Literal(Literal::Int64(30)).into(), + ); + + // let res = parse_str(input); + + let mut parser = CustomParser::new(TokenStream::new(input_toks)); + let res = parser.parse_expr().unwrap(); + + assert_eq!(expected, res); + } +}