nek-lang/src/parser.rs

use std::iter::Peekable;

use crate::lexer::Token;

/// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType {
    /// Addition
    Add,

    /// Subtraction
    Sub,

    /// Multiplication
    Mul,

    /// Divide
    Div,

    /// Modulo
    Mod,

    /// Bitwise OR (inclusive or)
    BOr,

    /// Bitwise And
    BAnd,

    /// Bitwise Xor (exclusive or)
    BXor,

    /// Shift Left
    Shl,

    /// Shift Right
    Shr,

    /// Check equality
    Equ,

    /// Check unequality
    Neq,

    /// Check greater than
    Gt,

    /// Check greater or equal
    Ge,

    /// Check less than
    Lt,

    /// Check less or equal
    Le,

    /// Assign to a variable
    Assign,
}

/// Types for unary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum UnOpType {
    /// Negation
    Neg,
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Ast {
    pub prog: Vec<Stmt>,
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Stmt {
    Expr(Expr),
    Let(String, Expr),
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Expr {
    /// Integer literal (64-bit)
    I64(i64),
    /// Identifier (variable name)
    Ident(String),
    /// Binary operation. Consists of type, left hand side and right hand side
    BinOp(BinOpType, Box<Expr>, Box<Expr>),
    /// Unary operation. Consists of type and the value that is operated on
    UnOp(UnOpType, Box<Expr>),
}

/*
## Grammar
### Expressions
expr_primary = LITERAL | IDENT | "(" expr ")" | "-" expr_primary
expr_mul = expr_primary (("*" | "/" | "%") expr_primary)*
expr_add = expr_mul (("+" | "-") expr_mul)*
expr_shift = expr_add ((">>" | "<<") expr_add)*
expr_rel = expr_shift ((">" | ">=" | "<" | "<=") expr_shift)*
expr_equ = expr_rel (("==" | "!=") expr_rel)*
expr_band = expr_equ ("&" expr_equ)*
expr_bxor = expr_band ("^") expr_band)*
expr_bor = expr_bxor ("|" expr_bxor)*
expr = expr_bor

## Statements
stmt_expr = expr
stmt_let = "let" IDENT "=" expr
stmt = stmt_expr | stmt_let (";")*
*/

struct Parser<T: Iterator<Item = Token>> {
    tokens: Peekable<T>,
}

impl<T: Iterator<Item = Token>> Parser<T> {
    /// Create a new parser to parse the given Token Stream
    fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
        let tokens = tokens.into_iter().peekable();
        Self { tokens }
    }

    fn parse(&mut self) -> Ast {
        let mut prog = Vec::new();

        loop {
            let stmt = match self.peek() {
                Token::Semicolon => {
                    self.next();
                    continue;
                }
                Token::EoF => break,
                Token::Let => self.parse_let_stmt(),
                // By default try to parse an expression
                _ => Stmt::Expr(self.parse_expr()),
            };

            prog.push(stmt);
        }

        Ast { prog }
    }

    fn parse_let_stmt(&mut self) -> Stmt {
        if !matches!(self.next(), Token::Let) {
            panic!("Error parsing let: Expected let token");
        }

        let name = match self.next() {
            Token::Ident(name) => name,
            _ => panic!("Error parsing let: Expected identifier after let"),
        };

        if !matches!(self.next(), Token::Assign) {
            panic!("Error parsing let: Expected assignment token");
        }

        let rhs = self.parse_expr();

        Stmt::Let(name, rhs)
    }

    fn parse_expr(&mut self) -> Expr {
        let lhs = self.parse_primary();
        self.parse_expr_precedence(lhs, 0)
    }

    /// Parse binary expressions with a precedence equal to or higher than min_prec
    fn parse_expr_precedence(&mut self, mut lhs: Expr, min_prec: u8) -> Expr {
        while let Some(binop) = &self.peek().try_to_binop() {
            // Stop if the next operator has a lower binding power
            if !(binop.precedence() >= min_prec) {
                break;
            }

            // The while condition already verified that this is some while peeking, so unwrap is
            // valid
            let binop = self.next().try_to_binop().unwrap();

            let mut rhs = self.parse_primary();

            while let Some(binop2) = &self.peek().try_to_binop() {
                if !(binop2.precedence() > binop.precedence()) {
                    break;
                }

                rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1);
            }

            lhs = Expr::BinOp(binop, lhs.into(), rhs.into());
        }

        lhs
    }

    /// Parse a primary expression (for now only number)
    fn parse_primary(&mut self) -> Expr {
        match self.next() {
            Token::I64(val) => Expr::I64(val),

            Token::Ident(name) => Expr::Ident(name),

            Token::LParen => {
                // The tokens was an opening parenthesis, so parse a full expression again as the
                // expression inside the parentheses `"(" expr ")"`
                let inner = self.parse_expr();

                // If there is no closing parenthesis after the expression, it is a syntax error
                if !matches!(self.next(), Token::RParen) {
                    panic!("Error parsing primary expr: Missing closing parenthesis ')'");
                }

                inner
            }

            Token::Sub => Expr::UnOp(UnOpType::Neg, self.parse_primary().into()),

            tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok),
        }
    }

    /// Get the next Token without removing it
    fn peek(&mut self) -> &Token {
        self.tokens.peek().unwrap_or(&Token::EoF)
    }

    /// Advance to next Token and return the removed Token
    fn next(&mut self) -> Token {
        self.tokens.next().unwrap_or(Token::EoF)
    }
}

pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
    let mut parser = Parser::new(tokens);
    parser.parse()
}

impl BinOpType {
    /// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
    /// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
    ///
    /// The operator precedences are derived from the C language operator precedences. While not all
    /// C operators are included or the exact same, the precedence oder is the same.
    /// See: https://en.cppreference.com/w/c/language/operator_precedence
    fn precedence(&self) -> u8 {
        match self {
            BinOpType::Assign => 0,
            BinOpType::BOr => 1,
            BinOpType::BXor => 2,
            BinOpType::BAnd => 3,
            BinOpType::Equ | BinOpType::Neq => 4,
            BinOpType::Gt | BinOpType::Ge | BinOpType::Lt | BinOpType::Le => 5,
            BinOpType::Shl | BinOpType::Shr => 6,
            BinOpType::Add | BinOpType::Sub => 7,
            BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 8,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::{parse, Expr, BinOpType};
    use crate::{lexer::Token, parser::{Stmt, Ast}};

    #[test]
    fn test_parser() {
        // Expression: 1 + 2 * 3 + 4
        // With precedence: (1 + (2 * 3)) + 4
        let tokens = [
            Token::I64(1),
            Token::Add,
            Token::I64(2),
            Token::Mul,
            Token::I64(3),
            Token::Sub,
            Token::I64(4),
        ];

        let expected = Expr::BinOp(
            BinOpType::Sub,
            Expr::BinOp(
                BinOpType::Add,
                Expr::I64(1).into(),
                Expr::BinOp(BinOpType::Mul, Expr::I64(2).into(), Expr::I64(3).into()).into(),
            )
            .into(),
            Expr::I64(4).into(),
        );

        let expected = Ast { prog: vec![Stmt::Expr(expected)] };

        let actual = parse(tokens);
        assert_eq!(expected, actual);
    }
}