nek-lang/src/parser.rs

294 lines
7.6 KiB
Rust

use std::iter::Peekable;
use crate::lexer::Token;
/// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType {
/// Addition
Add,
/// Subtraction
Sub,
/// Multiplication
Mul,
/// Divide
Div,
/// Modulo
Mod,
/// Bitwise OR (inclusive or)
BOr,
/// Bitwise And
BAnd,
/// Bitwise Xor (exclusive or)
BXor,
/// Shift Left
Shl,
/// Shift Right
Shr,
/// Check equality
Equ,
/// Check unequality
Neq,
/// Check greater than
Gt,
/// Check greater or equal
Ge,
/// Check less than
Lt,
/// Check less or equal
Le,
/// Assign to a variable
Assign,
}
/// Types for unary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum UnOpType {
/// Negation
Neg,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Ast {
pub prog: Vec<Stmt>,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Stmt {
Expr(Expr),
Let(String, Expr),
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Expr {
/// Integer literal (64-bit)
I64(i64),
/// Identifier (variable name)
Ident(String),
/// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expr>, Box<Expr>),
/// Unary operation. Consists of type and the value that is operated on
UnOp(UnOpType, Box<Expr>),
}
/*
## Grammar
### Expressions
expr_primary = LITERAL | IDENT | "(" expr ")" | "-" expr_primary
expr_mul = expr_primary (("*" | "/" | "%") expr_primary)*
expr_add = expr_mul (("+" | "-") expr_mul)*
expr_shift = expr_add ((">>" | "<<") expr_add)*
expr_rel = expr_shift ((">" | ">=" | "<" | "<=") expr_shift)*
expr_equ = expr_rel (("==" | "!=") expr_rel)*
expr_band = expr_equ ("&" expr_equ)*
expr_bxor = expr_band ("^") expr_band)*
expr_bor = expr_bxor ("|" expr_bxor)*
expr = expr_bor
## Statements
stmt_expr = expr
stmt_let = "let" IDENT "=" expr
stmt = stmt_expr | stmt_let (";")*
*/
struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>,
}
impl<T: Iterator<Item = Token>> Parser<T> {
/// Create a new parser to parse the given Token Stream
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
let tokens = tokens.into_iter().peekable();
Self { tokens }
}
fn parse(&mut self) -> Ast {
let mut prog = Vec::new();
loop {
let stmt = match self.peek() {
Token::Semicolon => {
self.next();
continue;
}
Token::EoF => break,
Token::Let => self.parse_let_stmt(),
// By default try to parse an expression
_ => Stmt::Expr(self.parse_expr()),
};
prog.push(stmt);
}
Ast { prog }
}
fn parse_let_stmt(&mut self) -> Stmt {
if !matches!(self.next(), Token::Let) {
panic!("Error parsing let: Expected let token");
}
let name = match self.next() {
Token::Ident(name) => name,
_ => panic!("Error parsing let: Expected identifier after let"),
};
if !matches!(self.next(), Token::Assign) {
panic!("Error parsing let: Expected assignment token");
}
let rhs = self.parse_expr();
Stmt::Let(name, rhs)
}
fn parse_expr(&mut self) -> Expr {
let lhs = self.parse_primary();
self.parse_expr_precedence(lhs, 0)
}
/// Parse binary expressions with a precedence equal to or higher than min_prec
fn parse_expr_precedence(&mut self, mut lhs: Expr, min_prec: u8) -> Expr {
while let Some(binop) = &self.peek().try_to_binop() {
// Stop if the next operator has a lower binding power
if !(binop.precedence() >= min_prec) {
break;
}
// The while condition already verified that this is some while peeking, so unwrap is
// valid
let binop = self.next().try_to_binop().unwrap();
let mut rhs = self.parse_primary();
while let Some(binop2) = &self.peek().try_to_binop() {
if !(binop2.precedence() > binop.precedence()) {
break;
}
rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1);
}
lhs = Expr::BinOp(binop, lhs.into(), rhs.into());
}
lhs
}
/// Parse a primary expression (for now only number)
fn parse_primary(&mut self) -> Expr {
match self.next() {
Token::I64(val) => Expr::I64(val),
Token::Ident(name) => Expr::Ident(name),
Token::LParen => {
// The tokens was an opening parenthesis, so parse a full expression again as the
// expression inside the parentheses `"(" expr ")"`
let inner = self.parse_expr();
// If there is no closing parenthesis after the expression, it is a syntax error
if !matches!(self.next(), Token::RParen) {
panic!("Error parsing primary expr: Missing closing parenthesis ')'");
}
inner
}
Token::Sub => Expr::UnOp(UnOpType::Neg, self.parse_primary().into()),
tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok),
}
}
/// Get the next Token without removing it
fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&Token::EoF)
}
/// Advance to next Token and return the removed Token
fn next(&mut self) -> Token {
self.tokens.next().unwrap_or(Token::EoF)
}
}
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let mut parser = Parser::new(tokens);
parser.parse()
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
fn precedence(&self) -> u8 {
match self {
BinOpType::Assign => 0,
BinOpType::BOr => 1,
BinOpType::BXor => 2,
BinOpType::BAnd => 3,
BinOpType::Equ | BinOpType::Neq => 4,
BinOpType::Gt | BinOpType::Ge | BinOpType::Lt | BinOpType::Le => 5,
BinOpType::Shl | BinOpType::Shr => 6,
BinOpType::Add | BinOpType::Sub => 7,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 8,
}
}
}
#[cfg(test)]
mod tests {
use super::{parse, Expr, BinOpType};
use crate::{lexer::Token, parser::{Stmt, Ast}};
#[test]
fn test_parser() {
// Expression: 1 + 2 * 3 + 4
// With precedence: (1 + (2 * 3)) + 4
let tokens = [
Token::I64(1),
Token::Add,
Token::I64(2),
Token::Mul,
Token::I64(3),
Token::Sub,
Token::I64(4),
];
let expected = Expr::BinOp(
BinOpType::Sub,
Expr::BinOp(
BinOpType::Add,
Expr::I64(1).into(),
Expr::BinOp(BinOpType::Mul, Expr::I64(2).into(), Expr::I64(3).into()).into(),
)
.into(),
Expr::I64(4).into(),
);
let expected = Ast { prog: vec![Stmt::Expr(expected)] };
let actual = parse(tokens);
assert_eq!(expected, actual);
}
}