use std::iter::Peekable; use crate::lexer::Token; /// Types for binary operators #[derive(Debug, PartialEq, Eq, Clone)] pub enum BinOpType { /// Addition Add, /// Subtraction Sub, /// Multiplication Mul, /// Divide Div, /// Modulo Mod, /// Compare Equal EquEqu, /// Compare Not Equal NotEqu, /// Less than Less, /// Less than or Equal LessEqu, /// Greater than Greater, /// Greater than or Equal GreaterEqu, /// Bitwise OR (inclusive or) BOr, /// Bitwise And BAnd, /// Bitwise Xor (exclusive or) BXor, /// Shift Left Shl, /// Shift Right Shr, } #[derive(Debug, PartialEq, Eq, Clone)] pub enum UnOpType { /// Unary Negate Negate, /// Bitwise Not BNot, } #[derive(Debug, PartialEq, Eq, Clone)] pub enum Ast { /// Integer literal (64-bit) I64(i64), /// Binary operation. Consists of type, left hand side and right hand side BinOp(BinOpType, Box, Box), /// Unary operation. Consists of type and operand UnOp(UnOpType, Box), } /* ## Grammar ### Expressions expr_primary = LITERAL | "(" expr p | "-" expr_primary | "~" expr_primary expr_mul = expr_primary (("*" | "/" | "%") expr_primary)* expr_add = expr_mul (("+" | "-") expr_mul)* expr_shift = expr_add ((">>" | "<<") expr_add)* expr_rel = expr_shift ((">" | ">=" | "<" | "<=") expr_shift)* expr_equ = expr_rel (("==" | "!=") expr_rel)* expr_band = expr_equ ("&" expr_equ)* expr_bxor = expr_band ("^" expr_band)* expr_bor = expr_bxor ("|" expr_bxor)* expr = expr_bor */ struct Parser> { tokens: Peekable, } impl> Parser { /// Create a new parser to parse the given Token Stream fn new>(tokens: A) -> Self { let tokens = tokens.into_iter().peekable(); Self { tokens } } fn parse(&mut self) -> Ast { self.parse_expr() } fn parse_expr(&mut self) -> Ast { let lhs = self.parse_primary(); self.parse_expr_precedence(lhs, 0) } /// Parse binary expressions with a precedence equal to or higher than min_prec fn parse_expr_precedence(&mut self, mut lhs: Ast, min_prec: u8) -> Ast { while let Some(binop) = &self.peek().try_to_binop() { // Stop if the next operator has a lower binding power if !(binop.precedence() >= min_prec) { break; } // The while condition already verified that this is some while peeking, so unwrap is // valid let binop = self.next().try_to_binop().unwrap(); let mut rhs = self.parse_primary(); while let Some(binop2) = &self.peek().try_to_binop() { if !(binop2.precedence() > binop.precedence()) { break; } rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1); } lhs = Ast::BinOp(binop, lhs.into(), rhs.into()); } lhs } /// Parse a primary expression (for now only number) fn parse_primary(&mut self) -> Ast { match self.next() { // Literal i64 Token::I64(val) => Ast::I64(val), // Parentheses grouping Token::LParen => { let inner_expr = self.parse_expr(); // Verify that there is a closing parenthesis if !matches!(self.next(), Token::RParen) { panic!("Error parsing primary expr: Exepected closing parenthesis ')'"); } inner_expr } // Unary negation Token::Sub => { let operand = self.parse_primary(); Ast::UnOp(UnOpType::Negate, operand.into()) } Token::Tilde => { let operand = self.parse_primary(); Ast::UnOp(UnOpType::BNot, operand.into()) } tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok), } } /// Get the next Token without removing it fn peek(&mut self) -> &Token { self.tokens.peek().unwrap_or(&Token::EoF) } /// Advance to next Token and return the removed Token fn next(&mut self) -> Token { self.tokens.next().unwrap_or(Token::EoF) } } pub fn parse, A: IntoIterator>(tokens: A) -> Ast { let mut parser = Parser::new(tokens); parser.parse() } impl BinOpType { /// Get the precedence for a binary operator. Higher value means the OP is stronger binding. /// For example Multiplication is stronger than addition, so Mul has higher precedence than Add. /// /// The operator precedences are derived from the C language operator precedences. While not all /// C operators are included or the exact same, the precedence oder is the same. /// See: https://en.cppreference.com/w/c/language/operator_precedence fn precedence(&self) -> u8 { match self { BinOpType::BOr => 0, BinOpType::BXor => 1, BinOpType::BAnd => 2, BinOpType::EquEqu | BinOpType::NotEqu => 3, BinOpType::Less | BinOpType::LessEqu | BinOpType::Greater | BinOpType::GreaterEqu => 4, BinOpType::Shl | BinOpType::Shr => 5, BinOpType::Add | BinOpType::Sub => 6, BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 7, } } } #[cfg(test)] mod tests { use super::{parse, Ast, BinOpType}; use crate::lexer::Token; #[test] fn test_parser() { // Expression: 1 + 2 * 3 + 4 // With precedence: (1 + (2 * 3)) + 4 let tokens = [ Token::I64(1), Token::Add, Token::I64(2), Token::Mul, Token::I64(3), Token::Sub, Token::I64(4), ]; let expected = Ast::BinOp( BinOpType::Sub, Ast::BinOp( BinOpType::Add, Ast::I64(1).into(), Ast::BinOp(BinOpType::Mul, Ast::I64(2).into(), Ast::I64(3).into()).into(), ) .into(), Ast::I64(4).into(), ); let actual = parse(tokens); assert_eq!(expected, actual); } }