294 lines
7.6 KiB
Rust
294 lines
7.6 KiB
Rust
use std::iter::Peekable;
|
|
|
|
use crate::lexer::Token;
|
|
|
|
/// Types for binary operators
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
pub enum BinOpType {
|
|
/// Addition
|
|
Add,
|
|
|
|
/// Subtraction
|
|
Sub,
|
|
|
|
/// Multiplication
|
|
Mul,
|
|
|
|
/// Divide
|
|
Div,
|
|
|
|
/// Modulo
|
|
Mod,
|
|
|
|
/// Bitwise OR (inclusive or)
|
|
BOr,
|
|
|
|
/// Bitwise And
|
|
BAnd,
|
|
|
|
/// Bitwise Xor (exclusive or)
|
|
BXor,
|
|
|
|
/// Shift Left
|
|
Shl,
|
|
|
|
/// Shift Right
|
|
Shr,
|
|
|
|
/// Check equality
|
|
Equ,
|
|
|
|
/// Check unequality
|
|
Neq,
|
|
|
|
/// Check greater than
|
|
Gt,
|
|
|
|
/// Check greater or equal
|
|
Ge,
|
|
|
|
/// Check less than
|
|
Lt,
|
|
|
|
/// Check less or equal
|
|
Le,
|
|
|
|
/// Assign to a variable
|
|
Assign,
|
|
}
|
|
|
|
/// Types for unary operators
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
pub enum UnOpType {
|
|
/// Negation
|
|
Neg,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
pub struct Ast {
|
|
pub prog: Vec<Stmt>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
pub enum Stmt {
|
|
Expr(Expr),
|
|
Let(String, Expr),
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
pub enum Expr {
|
|
/// Integer literal (64-bit)
|
|
I64(i64),
|
|
/// Identifier (variable name)
|
|
Ident(String),
|
|
/// Binary operation. Consists of type, left hand side and right hand side
|
|
BinOp(BinOpType, Box<Expr>, Box<Expr>),
|
|
/// Unary operation. Consists of type and the value that is operated on
|
|
UnOp(UnOpType, Box<Expr>),
|
|
}
|
|
|
|
/*
|
|
## Grammar
|
|
### Expressions
|
|
expr_primary = LITERAL | IDENT | "(" expr ")" | "-" expr_primary
|
|
expr_mul = expr_primary (("*" | "/" | "%") expr_primary)*
|
|
expr_add = expr_mul (("+" | "-") expr_mul)*
|
|
expr_shift = expr_add ((">>" | "<<") expr_add)*
|
|
expr_rel = expr_shift ((">" | ">=" | "<" | "<=") expr_shift)*
|
|
expr_equ = expr_rel (("==" | "!=") expr_rel)*
|
|
expr_band = expr_equ ("&" expr_equ)*
|
|
expr_bxor = expr_band ("^") expr_band)*
|
|
expr_bor = expr_bxor ("|" expr_bxor)*
|
|
expr = expr_bor
|
|
|
|
## Statements
|
|
stmt_expr = expr
|
|
stmt_let = "let" IDENT "=" expr
|
|
stmt = stmt_expr | stmt_let (";")*
|
|
*/
|
|
|
|
struct Parser<T: Iterator<Item = Token>> {
|
|
tokens: Peekable<T>,
|
|
}
|
|
|
|
impl<T: Iterator<Item = Token>> Parser<T> {
|
|
/// Create a new parser to parse the given Token Stream
|
|
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
|
let tokens = tokens.into_iter().peekable();
|
|
Self { tokens }
|
|
}
|
|
|
|
fn parse(&mut self) -> Ast {
|
|
let mut prog = Vec::new();
|
|
|
|
loop {
|
|
let stmt = match self.peek() {
|
|
Token::Semicolon => {
|
|
self.next();
|
|
continue;
|
|
}
|
|
Token::EoF => break,
|
|
Token::Let => self.parse_let_stmt(),
|
|
// By default try to parse an expression
|
|
_ => Stmt::Expr(self.parse_expr()),
|
|
};
|
|
|
|
prog.push(stmt);
|
|
}
|
|
|
|
Ast { prog }
|
|
}
|
|
|
|
fn parse_let_stmt(&mut self) -> Stmt {
|
|
if !matches!(self.next(), Token::Let) {
|
|
panic!("Error parsing let: Expected let token");
|
|
}
|
|
|
|
let name = match self.next() {
|
|
Token::Ident(name) => name,
|
|
_ => panic!("Error parsing let: Expected identifier after let"),
|
|
};
|
|
|
|
if !matches!(self.next(), Token::Assign) {
|
|
panic!("Error parsing let: Expected assignment token");
|
|
}
|
|
|
|
let rhs = self.parse_expr();
|
|
|
|
Stmt::Let(name, rhs)
|
|
}
|
|
|
|
fn parse_expr(&mut self) -> Expr {
|
|
let lhs = self.parse_primary();
|
|
self.parse_expr_precedence(lhs, 0)
|
|
}
|
|
|
|
/// Parse binary expressions with a precedence equal to or higher than min_prec
|
|
fn parse_expr_precedence(&mut self, mut lhs: Expr, min_prec: u8) -> Expr {
|
|
while let Some(binop) = &self.peek().try_to_binop() {
|
|
// Stop if the next operator has a lower binding power
|
|
if !(binop.precedence() >= min_prec) {
|
|
break;
|
|
}
|
|
|
|
// The while condition already verified that this is some while peeking, so unwrap is
|
|
// valid
|
|
let binop = self.next().try_to_binop().unwrap();
|
|
|
|
let mut rhs = self.parse_primary();
|
|
|
|
while let Some(binop2) = &self.peek().try_to_binop() {
|
|
if !(binop2.precedence() > binop.precedence()) {
|
|
break;
|
|
}
|
|
|
|
rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1);
|
|
}
|
|
|
|
lhs = Expr::BinOp(binop, lhs.into(), rhs.into());
|
|
}
|
|
|
|
lhs
|
|
}
|
|
|
|
/// Parse a primary expression (for now only number)
|
|
fn parse_primary(&mut self) -> Expr {
|
|
match self.next() {
|
|
Token::I64(val) => Expr::I64(val),
|
|
|
|
Token::Ident(name) => Expr::Ident(name),
|
|
|
|
Token::LParen => {
|
|
// The tokens was an opening parenthesis, so parse a full expression again as the
|
|
// expression inside the parentheses `"(" expr ")"`
|
|
let inner = self.parse_expr();
|
|
|
|
// If there is no closing parenthesis after the expression, it is a syntax error
|
|
if !matches!(self.next(), Token::RParen) {
|
|
panic!("Error parsing primary expr: Missing closing parenthesis ')'");
|
|
}
|
|
|
|
inner
|
|
}
|
|
|
|
Token::Sub => Expr::UnOp(UnOpType::Neg, self.parse_primary().into()),
|
|
|
|
tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok),
|
|
}
|
|
}
|
|
|
|
/// Get the next Token without removing it
|
|
fn peek(&mut self) -> &Token {
|
|
self.tokens.peek().unwrap_or(&Token::EoF)
|
|
}
|
|
|
|
/// Advance to next Token and return the removed Token
|
|
fn next(&mut self) -> Token {
|
|
self.tokens.next().unwrap_or(Token::EoF)
|
|
}
|
|
}
|
|
|
|
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
|
|
let mut parser = Parser::new(tokens);
|
|
parser.parse()
|
|
}
|
|
|
|
impl BinOpType {
|
|
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
|
|
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
|
|
///
|
|
/// The operator precedences are derived from the C language operator precedences. While not all
|
|
/// C operators are included or the exact same, the precedence oder is the same.
|
|
/// See: https://en.cppreference.com/w/c/language/operator_precedence
|
|
fn precedence(&self) -> u8 {
|
|
match self {
|
|
BinOpType::Assign => 0,
|
|
BinOpType::BOr => 1,
|
|
BinOpType::BXor => 2,
|
|
BinOpType::BAnd => 3,
|
|
BinOpType::Equ | BinOpType::Neq => 4,
|
|
BinOpType::Gt | BinOpType::Ge | BinOpType::Lt | BinOpType::Le => 5,
|
|
BinOpType::Shl | BinOpType::Shr => 6,
|
|
BinOpType::Add | BinOpType::Sub => 7,
|
|
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 8,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{parse, Expr, BinOpType};
|
|
use crate::{lexer::Token, parser::{Stmt, Ast}};
|
|
|
|
#[test]
|
|
fn test_parser() {
|
|
// Expression: 1 + 2 * 3 + 4
|
|
// With precedence: (1 + (2 * 3)) + 4
|
|
let tokens = [
|
|
Token::I64(1),
|
|
Token::Add,
|
|
Token::I64(2),
|
|
Token::Mul,
|
|
Token::I64(3),
|
|
Token::Sub,
|
|
Token::I64(4),
|
|
];
|
|
|
|
let expected = Expr::BinOp(
|
|
BinOpType::Sub,
|
|
Expr::BinOp(
|
|
BinOpType::Add,
|
|
Expr::I64(1).into(),
|
|
Expr::BinOp(BinOpType::Mul, Expr::I64(2).into(), Expr::I64(3).into()).into(),
|
|
)
|
|
.into(),
|
|
Expr::I64(4).into(),
|
|
);
|
|
|
|
let expected = Ast { prog: vec![Stmt::Expr(expected)] };
|
|
|
|
let actual = parse(tokens);
|
|
assert_eq!(expected, actual);
|
|
}
|
|
}
|