From 8de6e990b4815584d7d6023c1f84d80826d53988 Mon Sep 17 00:00:00 2001 From: Daniel M Date: Thu, 23 Dec 2021 22:59:57 +0100 Subject: [PATCH] Start implementing parser - Implemented AST + parsing for maths expressions --- plang2/src/main.rs | 14 ++- plang2_lib/src/ast.rs | 25 +++++ plang2_lib/src/lib.rs | 6 +- plang2_lib/src/parser.rs | 225 +++++++++++++++++++++++++++++++++++++++ plang2_lib/src/token.rs | 7 +- 5 files changed, 265 insertions(+), 12 deletions(-) create mode 100644 plang2_lib/src/ast.rs create mode 100644 plang2_lib/src/parser.rs diff --git a/plang2/src/main.rs b/plang2/src/main.rs index 9c4f981..bbc0a26 100644 --- a/plang2/src/main.rs +++ b/plang2/src/main.rs @@ -4,14 +4,7 @@ use plang2_lib::*; fn main() { let code = r#" - // This is the main function - fn main() { - let a = 5465; - let b = 8; - let c = a + b; - - print_int(c); - } + -( -5 + 2 ) * -( 2 * -5 ) + -( 2 - 6 ) "#; let mut lexer = Lexer::new(code); @@ -20,4 +13,9 @@ fn main() { println!("Tokens: \n{}\n", tokens); + let mut parser = Parser::new(tokens); + let expr = parser.parse().unwrap(); + + println!("{:#?}", expr); + } diff --git a/plang2_lib/src/ast.rs b/plang2_lib/src/ast.rs new file mode 100644 index 0000000..53dff35 --- /dev/null +++ b/plang2_lib/src/ast.rs @@ -0,0 +1,25 @@ +use crate::token::Literal; + +/// Binary Operator Types. For operations that have two operands +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum BinOpType { + Add, + Sub, + + Mul, + Div, + Mod +} + +/// Unary Operator Types. For operations that have one operand +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum UnOpType { + Neg +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Expr { + Literal(Literal), + BinOp(BinOpType, Box, Box), + UnOp(UnOpType, Box), +} \ No newline at end of file diff --git a/plang2_lib/src/lib.rs b/plang2_lib/src/lib.rs index 7269279..c974b80 100644 --- a/plang2_lib/src/lib.rs +++ b/plang2_lib/src/lib.rs @@ -1,5 +1,7 @@ pub mod token; pub mod lexer; +pub mod ast; +pub mod parser; -pub use token::*; -pub use lexer::*; +pub use lexer::Lexer; +pub use parser::Parser; diff --git a/plang2_lib/src/parser.rs b/plang2_lib/src/parser.rs new file mode 100644 index 0000000..1000dbf --- /dev/null +++ b/plang2_lib/src/parser.rs @@ -0,0 +1,225 @@ +use crate::{ + ast::{BinOpType, Expr, UnOpType}, + token::{Group, Op, Token, TokenStream}, +}; + +#[derive(Debug)] +pub struct ParseErr; + +type PRes = Result; + +pub struct Parser { + tokens: TokenStream, +} + +/* +GRAMMAR + +expr_literal = "-" Literal | Literal +expr_term = "-" expr_term | "(" expr_add ")" | expr_literal +expr_mul = expr_term (("*"|"/") expr_term)* +expr_add = expr_mul (("+"|"-") expr_mul)* +*/ + +impl Parser { + pub fn new(tokens: TokenStream) -> Self { + Self { tokens } + } + + pub fn curr(&self) -> Option<&Token> { + self.tokens.curr() + } + + pub fn peek(&self) -> Option<&Token> { + self.tokens.peek() + } + + pub fn advance(&mut self) -> Option<&Token> { + self.tokens.advance() + } + + pub fn parse(&mut self) -> PRes { + self.parse_expr_add() + } + + pub fn parse_expr_add(&mut self) -> PRes { + let mut a = self.parse_expr_mul()?; + + while matches!(self.curr(), Some(Token::Op(Op::Add | Op::Sub))) { + // We successfully matched curr against Some already in the while condition, so unwrap is fine + let tok_op = self.advance().unwrap().clone(); + + let b = self.parse_expr_mul()?; + + let op_type = match tok_op { + Token::Op(Op::Add) => BinOpType::Add, + Token::Op(Op::Sub) => BinOpType::Sub, + _ => unreachable!(), + }; + + a = Expr::BinOp(op_type, a.into(), b.into()); + } + + Ok(a) + } + + pub fn parse_expr_mul(&mut self) -> PRes { + let mut a = self.parse_expr_term()?; + + while matches!(self.curr(), Some(Token::Op(Op::Mul | Op::Div | Op::Mod))) { + // We successfully matched curr against Some already in the while condition, so unwrap is fine + let tok_op = self.advance().unwrap().clone(); + + let b = self.parse_expr_term()?; + + let op_type = match tok_op { + Token::Op(Op::Mul) => BinOpType::Mul, + Token::Op(Op::Div) => BinOpType::Div, + Token::Op(Op::Mod) => BinOpType::Mod, + _ => unreachable!(), + }; + + a = Expr::BinOp(op_type, a.into(), b.into()); + } + + Ok(a) + } + + pub fn parse_expr_term(&mut self) -> PRes { + let term = match self.curr() { + Some(Token::Open(Group::Paren)) => { + self.advance(); + let a = self.parse_expr_add()?; + if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { + panic!("Missing closing parentheses"); + } + a + } + Some(Token::Op(Op::Sub)) => { + self.advance(); + Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) + } + _ => self.parse_expr_literal()?, + }; + Ok(term) + } + + pub fn parse_expr_literal(&mut self) -> PRes { + match self.advance() { + Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())), + _ => panic!("Unexpected token. Expected literal"), + } + } +} + +#[cfg(test)] +mod tests { + use crate::{ + ast::{BinOpType, Expr, UnOpType}, + token::{Group, Literal, Op, Token, TokenStream}, + Parser, + }; + + // fn parse_str(code: &str) -> Expr { + // let mut lexer = Lexer::new(code); + // let tokens = lexer.tokenize().unwrap(); + // let mut parser = Parser::new(tokens); + + // parser.parse().unwrap() + // } + + #[test] + fn test_groupings_neg() { + // let input = "(-(-5+2)*-(2*-5)+-(2-6)) % 30"; + + // (-(-5+2)*-(2*-5)+-(2-6)) % 30 + let input_toks = vec![ + Token::Open(Group::Paren), + Token::Op(Op::Sub), + Token::Open(Group::Paren), + Token::Op(Op::Sub), + Token::Literal(Literal::Int64(5)), + Token::Op(Op::Add), + Token::Literal(Literal::Int64(2)), + Token::Close(Group::Paren), + Token::Op(Op::Mul), + Token::Op(Op::Sub), + Token::Open(Group::Paren), + Token::Literal(Literal::Int64(2)), + Token::Op(Op::Mul), + Token::Op(Op::Sub), + Token::Literal(Literal::Int64(5)), + Token::Close(Group::Paren), + Token::Op(Op::Add), + Token::Op(Op::Sub), + Token::Open(Group::Paren), + Token::Literal(Literal::Int64(2)), + Token::Op(Op::Sub), + Token::Literal(Literal::Int64(6)), + Token::Close(Group::Paren), + Token::Close(Group::Paren), + Token::Op(Op::Mod), + Token::Literal(Literal::Int64(30)), + ]; + + // -(-5+2) + let neg_grp_neg_5_add_2 = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Add, + Box::new(Expr::UnOp( + UnOpType::Neg, + Expr::Literal(Literal::Int64(5)).into(), + )), + Expr::Literal(Literal::Int64(2)).into(), + )), + ); + + // -(2*-5) + let neg_grp_2_mul_neg_2 = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Mul, + Expr::Literal(Literal::Int64(2)).into(), + Box::new(Expr::UnOp( + UnOpType::Neg, + Expr::Literal(Literal::Int64(5)).into(), + )), + )), + ); + + // -(-5+2)*-(2*-5) + let mul_first = Expr::BinOp( + BinOpType::Mul, + neg_grp_neg_5_add_2.into(), + neg_grp_2_mul_neg_2.into(), + ); + + // -(2-6) + let neg_grp_2_sub_6 = Expr::UnOp( + UnOpType::Neg, + Box::new(Expr::BinOp( + BinOpType::Sub, + Expr::Literal(Literal::Int64(2)).into(), + Expr::Literal(Literal::Int64(6)).into(), + )), + ); + + // -(-5+2)*-(2*-5)+-(2-6) + let left_of_mod = Expr::BinOp(BinOpType::Add, mul_first.into(), neg_grp_2_sub_6.into()); + + // (-(-5+2)*-(2*-5)+-(2-6)) % 30 + let expected = Expr::BinOp( + BinOpType::Mod, + left_of_mod.into(), + Expr::Literal(Literal::Int64(30)).into(), + ); + + // let res = parse_str(input); + + let mut parser = Parser::new(TokenStream::new(input_toks)); + let res = parser.parse_expr_add().unwrap(); + + assert_eq!(expected, res); + } +} diff --git a/plang2_lib/src/token.rs b/plang2_lib/src/token.rs index 0e7a8c5..23fbac0 100644 --- a/plang2_lib/src/token.rs +++ b/plang2_lib/src/token.rs @@ -154,8 +154,11 @@ impl TokenStream { pub fn peek(&self) -> Option<&Token> { self.tokens.get(self.idx + 1) } - pub fn advance(&mut self) { - self.idx += 1 + + /// Advance to the next token. Sets curr to next and returns the old curr. + pub fn advance(&mut self) -> Option<&Token> { + self.idx += 1; + self.tokens.get(self.idx - 1) } }