From 1712dac6d66497443d501cd7587d3a08099938da Mon Sep 17 00:00:00 2001 From: Daniel M Date: Tue, 28 Dec 2021 23:59:10 +0100 Subject: [PATCH] Impl. more expr types in parser - Impl. unary bool not "!" - Impl. relational exprs (">", ">=", "<", "<=") - Impl. equality exprs ("==", "!=") - Impl. bool and, or, xor ("&&", "||", "^^") - Changed bool xor lexing from "^" to "^^" in order to keep "^" reserved for bitwise xor - Prepared Ast for if, while & fn_def statements --- plang2_lib/src/ast.rs | 57 ++++++++++ plang2_lib/src/interpreter.rs | 31 ++++-- plang2_lib/src/lexer.rs | 7 +- plang2_lib/src/parser.rs | 199 +++++++++++++++++++++++++++------- plang2_lib/src/token.rs | 8 +- 5 files changed, 251 insertions(+), 51 deletions(-) diff --git a/plang2_lib/src/ast.rs b/plang2_lib/src/ast.rs index 56a86dc..1811d21 100644 --- a/plang2_lib/src/ast.rs +++ b/plang2_lib/src/ast.rs @@ -3,18 +3,51 @@ use crate::token::Literal; /// Binary Operator Types. For operations that have two operands #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum BinOpType { + /// Addition "+" Add, + /// Subtraction "-" Sub, + /// Multiplication "*" Mul, + /// Division "/" Div, + /// Modulo "%" Mod, + + // Equality + /// Equal "==" + Eq, + /// Not equal "!=" + Neq, + + /// Greater than ">" + Gt, + /// Lesser than "<" + Lt, + /// Greater or equal ">=" + Ge, + /// Lesser or equal "<=" + Le, + + // Boolean + /// Boolean And "&&" + And, + /// Boolean Or "||" + Or, + /// Boolean Not "!" + Not, + /// Boolean Xor "^^" + Xor, } /// Unary Operator Types. For operations that have one operand #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum UnOpType { + /// Negation "-" Neg, + /// Boolean Not "!" + Not, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -24,6 +57,27 @@ pub struct FnCall { pub args: Vec, } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FnDef { + pub fn_name: String, + /// Argument names + pub args: Vec, + pub body: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IfStmt { + pub condition: Expr, + pub body_true: Vec, + pub body_false: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct WhileStmt { + pub condition: Expr, + pub body: Vec, +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Expr { Literal(Literal), @@ -39,6 +93,9 @@ pub enum Statement { Expr(Expr), LetBinding(String, Expr), Assignment(String, Expr), + FnDef(FnDef), + IfStmt(IfStmt), + WhileStmt(WhileStmt), } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/plang2_lib/src/interpreter.rs b/plang2_lib/src/interpreter.rs index 6580674..d386a13 100644 --- a/plang2_lib/src/interpreter.rs +++ b/plang2_lib/src/interpreter.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use crate::{ast::{Statement, Expr, Ast}, token::Literal}; +use crate::{ast::{Statement, Expr, Ast, BinOpType, UnOpType}, token::Literal}; pub struct Interpreter { prog: Vec, @@ -44,6 +44,9 @@ impl Interpreter { let rhs = self.execute_expr(expr); *self.variables.get_mut(&var_name).expect("Assigning variable before declaration") = rhs; }, + Statement::FnDef(_) => todo!(), + Statement::IfStmt(_) => todo!(), + Statement::WhileStmt(_) => todo!(), } } @@ -64,18 +67,31 @@ impl Interpreter { }; let res = match bot { - crate::ast::BinOpType::Add => lhs + rhs, - crate::ast::BinOpType::Sub => lhs - rhs, - crate::ast::BinOpType::Mul => lhs * rhs, - crate::ast::BinOpType::Div => lhs / rhs, - crate::ast::BinOpType::Mod => lhs % rhs, + BinOpType::Add => lhs + rhs, + BinOpType::Sub => lhs - rhs, + + BinOpType::Mul => lhs * rhs, + BinOpType::Div => lhs / rhs, + BinOpType::Mod => lhs % rhs, + + BinOpType::Eq => todo!(), + BinOpType::Neq => todo!(), + BinOpType::Gt => todo!(), + BinOpType::Lt => todo!(), + BinOpType::Ge => todo!(), + BinOpType::Le => todo!(), + + BinOpType::And => todo!(), + BinOpType::Or => todo!(), + BinOpType::Not => todo!(), + BinOpType::Xor => todo!(), }; Literal::Int64(res) } Expr::UnOp(uot, expr) => { match uot { - crate::ast::UnOpType::Neg => { + UnOpType::Neg => { let mut res = self.execute_expr(*expr); match &mut res { Literal::Boolean(_) => panic!("Can't negate bool"), @@ -85,6 +101,7 @@ impl Interpreter { res } + UnOpType::Not => todo!(), } } } diff --git a/plang2_lib/src/lexer.rs b/plang2_lib/src/lexer.rs index 04ff857..6a4844d 100644 --- a/plang2_lib/src/lexer.rs +++ b/plang2_lib/src/lexer.rs @@ -97,6 +97,10 @@ impl<'a> Lexer<'a> { self.advance(); tokens.push(Token::Op(Op::Or)); } + '^' if matches!(ch_nxt, Some('^')) => { + self.advance(); + tokens.push(Token::Op(Op::Xor)); + } // Lex tokens with 1 char length. This just matches the current char '+' => tokens.push(Token::Op(Op::Add)), @@ -118,7 +122,6 @@ impl<'a> Lexer<'a> { ',' => tokens.push(Token::Comma), '.' => tokens.push(Token::Dot), '!' => tokens.push(Token::Op(Op::Not)), - '^' => tokens.push(Token::Op(Op::Xor)), '#' => tokens.push(Token::Hashtag), // A quote represents a string start, so lex a string token here @@ -301,7 +304,7 @@ mod test { * / % == != > < >= <= = -> - && || ^ ! + && || ^^ ! ([{)]} 4564 "a string" false true an_5ident6 diff --git a/plang2_lib/src/parser.rs b/plang2_lib/src/parser.rs index 3e2abfd..d66f4be 100644 --- a/plang2_lib/src/parser.rs +++ b/plang2_lib/src/parser.rs @@ -3,6 +3,7 @@ use crate::{ token::{Group, Keyword, Op, Token, TokenStream}, }; +/// TODO: Real parsing errors instead of panics in the Parser #[derive(Debug)] pub struct ParseErr; @@ -12,41 +13,31 @@ type PRes = Result; /// /// ## Grammar /// ### Statements -/// `stmt_let = "let" ident "=" expr_add` \ -/// `stmt_assign = ident "=" expr_add` \ -/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` \ +/// `stmt_let = "let" IDENT "=" expr` \ +/// `stmt_assign = IDENT "=" expr` \ +/// `stmt_fn_def = "fn" IDENT "(" IDENT? ( "," IDENT)* ")" "{" stmt* "}"` \ +/// `stmt_if = "if" expr "{" stmt* "}" ( "else" "{" stmt* "}" )?` \ +/// `stmt_while = "while" expr "{" stmt* "}"` +/// `stmt = ( stmt_let | stmt_assign | expr | stmt_fn_def | stmt_if | stmt_while | ) ";"` \ /// /// ### Expressions /// `expr_literal = LITERAL` \ -/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` \ +/// `expr_fn_call = IDENT "(" expr? ( "," expr )* ")"` \ /// `expr_varibale = IDENT` \ /// `expr_value = expr_literal | expr_fn_call | expr_variable` \ -/// `expr_term = "-" expr_term | "(" expr_add ")" | expr_value` \ +/// `expr_term = "-" expr_term | "!" expr_term | "(" expr ")" | expr_value` \ /// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \ /// `expr_add = expr_mul (("+"|"-") expr_mul)*` \ +/// `expr_rel = expr_add ((">"|"<"|">="|"<=") expr_add)*` +/// `expr_equal = expr_rel (("=="|"!=") expr_rel)*` +/// `expr_and = expr_equal ("&&" expr_equal)*` +/// `expr_xor = expr_and ("^^" expr_and)*` +/// `expr_or = expr_xor ("||" expr_xor)*` +/// `expr = expr_or` pub struct Parser { tokens: TokenStream, } -/* -# GRAMMAR -## expressions - -expr_literal = LITERAL -expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")" -expr_varibale = IDENT -expr_value = expr_literal | expr_fn_call | expr_variable -expr_term = "-" expr_term | "(" expr_add ")" | expr_value -expr_mul = expr_term (("*"|"/"|"%") expr_term)* -expr_add = expr_mul (("+"|"-") expr_mul)* - -## statements -stmt_let = "let" ident "=" expr_add -stmt_assign = ident "=" expr_add -stmt = ( stmt_let | stmt_assign | expr_add ) ";" -*/ - - impl Parser { /// Create a new parser from a TokenStream pub fn new(tokens: TokenStream) -> Self { @@ -91,7 +82,7 @@ impl Parser { /// an assignment. /// /// ### Grammar - /// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` + /// `stmt = ( stmt_let | stmt_assign | expr ) ";"` pub fn parse_statement(&mut self) -> PRes { // Check the current and next char to decide what kind of statement is being parsed let stmt = match self.curr() { @@ -102,7 +93,7 @@ impl Parser { self.parse_stmt_assign() } // Otherwise -> A simple expression - _ => self.parse_expr_add().map(|expr| Statement::Expr(expr)), + _ => self.parse_expr().map(|expr| Statement::Expr(expr)), }; // Check that the statement is terminated with a semicolon. @@ -118,7 +109,7 @@ impl Parser { /// an equal sign "=" and an expression. /// /// ### Grammar - /// `stmt_let = "let" ident "=" expr_add` + /// `stmt_let = "let" ident "=" expr` pub fn parse_stmt_let(&mut self) -> PRes { // Check if the let token is there if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) { @@ -137,7 +128,7 @@ impl Parser { } // Parse the right hand side of the let statement - let rhs = self.parse_expr_add()?; + let rhs = self.parse_expr()?; let let_binding = Statement::LetBinding(var_name, rhs); @@ -148,7 +139,7 @@ impl Parser { /// equal sign "=" and an expression. /// /// ### Grammar - /// `stmt_assign = ident "=" expr_add` + /// `stmt_assign = ident "=" expr` pub fn parse_stmt_assign(&mut self) -> PRes { // Fetch the variable name let var_name = match self.advance() { @@ -162,16 +153,140 @@ impl Parser { } // Parse the right hand side of the assignment - let rhs = self.parse_expr_add()?; + let rhs = self.parse_expr()?; let let_binding = Statement::Assignment(var_name, rhs); Ok(let_binding) } - /// The main expression parsing function. This can be a multiplication expression and 0 or more - /// further multiplication expressions separated by addition precedence operators (add '+', - /// sub '-'). + /// The main expression parsing function. + pub fn parse_expr(&mut self) -> PRes { + self.parse_expr_or() + } + + pub fn parse_expr_or(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_xor()?; + + // Parse 0 or more expressions to the right side of the or operators + while matches!(self.curr(), Some(Token::Op(Op::Or))) { + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine + let tok_op = self.advance().unwrap().clone(); + + let rhs = self.parse_expr_xor()?; + + let op_type = match tok_op { + Token::Op(Op::Or) => BinOpType::Or, + _ => unreachable!(), + }; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_xor(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_and()?; + + // Parse 0 or more expressions to the right side of the xor operators + while matches!(self.curr(), Some(Token::Op(Op::Xor))) { + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine + let tok_op = self.advance().unwrap().clone(); + + let rhs = self.parse_expr_and()?; + + let op_type = match tok_op { + Token::Op(Op::Xor) => BinOpType::Xor, + _ => unreachable!(), + }; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_and(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_equal()?; + + // Parse 0 or more expressions to the right side of the and operators + while matches!(self.curr(), Some(Token::Op(Op::And))) { + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine + let tok_op = self.advance().unwrap().clone(); + + let rhs = self.parse_expr_equal()?; + + let op_type = match tok_op { + Token::Op(Op::And) => BinOpType::And, + _ => unreachable!(), + }; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_equal(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_rel()?; + + // Parse 0 or more expressions to the right side of the equality operators + while matches!(self.curr(), Some(Token::Op(Op::Eq | Op::Neq))) { + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine + let tok_op = self.advance().unwrap().clone(); + + let rhs = self.parse_expr_rel()?; + + let op_type = match tok_op { + Token::Op(Op::Eq) => BinOpType::Eq, + Token::Op(Op::Neq) => BinOpType::Neq, + _ => unreachable!(), + }; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + pub fn parse_expr_rel(&mut self) -> PRes { + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_add()?; + + // Parse 0 or more expressions to the right side of the relational operators + while matches!(self.curr(), Some(Token::Op(Op::Gt | Op::Lt | Op::Ge | Op::Le))) { + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine + let tok_op = self.advance().unwrap().clone(); + + let rhs = self.parse_expr_add()?; + + let op_type = match tok_op { + Token::Op(Op::Gt) => BinOpType::Gt, + Token::Op(Op::Lt) => BinOpType::Lt, + Token::Op(Op::Ge) => BinOpType::Ge, + Token::Op(Op::Le) => BinOpType::Le, + _ => unreachable!(), + }; + + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); + } + + Ok(lhs) + } + + /// Parse an add expression from the TokenStream. This can be a multiplication expression and + /// 0 or more further multiplication expressions separated by addition precedence operators + /// (add '+', sub '-'). /// /// Add is the operator with the lowest precedence which is why this recursively handles all /// other kinds of expressions. @@ -236,7 +351,7 @@ impl Parser { /// expression enclosed by parentheses or a value. /// /// ### Grammar - /// `"-" expr_term | "(" expr_add ")" | expr_value` + /// `"-" expr_term | "!" expr_term | "(" expr_add ")" | expr_value` pub fn parse_expr_term(&mut self) -> PRes { let term = match self.curr() { // Current token is an opening parentheses '(' -> Must be an enclosed expr_add @@ -244,7 +359,7 @@ impl Parser { // Skip the '(' self.advance(); - let expr = self.parse_expr_add()?; + let expr = self.parse_expr()?; // After the expression must be closing parentheses ')' if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { @@ -261,6 +376,14 @@ impl Parser { // Parse an expr_term in a Negation Node Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) } + // Current token is a not '!' -> Must be a not expr_term + Some(Token::Op(Op::Not)) => { + // Skip the '!' + self.advance(); + + // Parse an expr_term in a Not Node + Expr::UnOp(UnOpType::Not, self.parse_expr_term()?.into()) + } // Nothing special in the current -> Must be an expr_value _ => self.parse_expr_value()?, }; @@ -309,13 +432,13 @@ impl Parser { // one add expression // TODO: This is *suboptimal* code if !matches!(self.curr(), Some(Token::Close(Group::Paren))) { - args.push(self.parse_expr_add()?); + args.push(self.parse_expr()?); // As long as there are commas after the expressions, parse more expressions as // parameters while matches!(self.curr(), Some(Token::Comma)) { self.advance(); - args.push(self.parse_expr_add()?); + args.push(self.parse_expr()?); } } @@ -467,7 +590,7 @@ mod tests { // let res = parse_str(input); let mut parser = Parser::new(TokenStream::new(input_toks)); - let res = parser.parse_expr_add().unwrap(); + let res = parser.parse_expr().unwrap(); assert_eq!(expected, res); } diff --git a/plang2_lib/src/token.rs b/plang2_lib/src/token.rs index 681bea8..566303c 100644 --- a/plang2_lib/src/token.rs +++ b/plang2_lib/src/token.rs @@ -39,13 +39,13 @@ pub enum Op { // Boolean - /// And "&&" + /// Boolean And "&&" And, - /// Or "||" + /// Boolean Or "||" Or, - /// Not "!" + /// Boolean Not "!" Not, - /// Xor "^" + /// Boolean Xor "^^" Xor, /// Arrow "->"