diff --git a/src/lexer.rs b/src/lexer.rs index ef0b582..a2cb761 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,12 +1,20 @@ use std::{iter::Peekable, str::Chars}; +use crate::parser::BinOpType; + #[derive(Debug, PartialEq, Eq)] pub enum Token { + /// Integer literal (64-bit) I64(i64), + /// Plus (+) Add, + /// Asterisk (*) Mul, + + /// End of file + EoF, } struct Lexer<'a> { @@ -24,7 +32,10 @@ impl<'a> Lexer<'a> { while let Some(ch) = self.next() { match ch { + // Skip whitespace ' ' => (), + + // Lex numbers '0'..='9' => { let mut sval = String::from(ch); @@ -40,13 +51,12 @@ impl<'a> Lexer<'a> { '+' => tokens.push(Token::Add), '*' => tokens.push(Token::Mul), - + //TODO: Don't panic, keep calm _ => panic!("Lexer encountered unexpected char: '{}'", ch), - } } - + tokens } @@ -55,20 +65,33 @@ impl<'a> Lexer<'a> { self.code.next() } - /// Shows next character + /// Get the next character without removing it fn peek(&mut self) -> Option { self.code.peek().copied() } } +/// Lex the provided code into a Token Buffer +/// +/// TODO: Don't panic and implement error handling using Result pub fn lex(code: &str) -> Vec { let mut lexer = Lexer::new(code); lexer.lex() } +impl Token { + pub fn try_to_binop(&self) -> Option { + Some(match self { + Token::Add => BinOpType::Add, + Token::Mul => BinOpType::Mul, + _ => return None, + }) + } +} + #[cfg(test)] mod tests { - use super::{Token, lex}; + use super::{lex, Token}; #[test] fn test_lexer() { diff --git a/src/lib.rs b/src/lib.rs index ba0dab2..6054fb1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,2 @@ pub mod lexer; - +pub mod parser; diff --git a/src/main.rs b/src/main.rs index 5d255e2..0f936c3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,23 @@ -use nek_lang::lexer::lex; +use nek_lang::{lexer::lex, parser::parse}; fn main() { let code = "33 +5*2"; + // Should produce ast: Add { + // lhs: I64(33), + // rhs: Mul: { + // lhs: I64(5), + // rhs: I64(2) + // } + // } let tokens = lex(code); - println!("{:?}", tokens); + println!("Tokens: {:?}\n", tokens); + + let ast = parse(tokens); + + println!("Ast: {:#?}", ast); } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..79c9986 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,102 @@ +use std::iter::Peekable; + +use crate::lexer::Token; + +#[derive(Debug)] +pub enum BinOpType { + /// Addition + Add, + + /// Multiplication + Mul, +} + +#[derive(Debug)] +pub enum Ast { + /// Integer literal (64-bit) + I64(i64), + /// Binary operation. Consists of type, left hand side and right hand side + BinOp(BinOpType, Box, Box), +} + +struct Parser> { + tokens: Peekable, +} + +impl> Parser { + /// Create a new parser to parse the given Token Stream + fn new>(tokens: A) -> Self { + let tokens = tokens.into_iter().peekable(); + Self { tokens } + } + + fn parse(&mut self) -> Ast { + self.parse_expr() + } + + fn parse_expr(&mut self) -> Ast { + let lhs = self.parse_primary(); + self.parse_expr_precedence(lhs, 0) + } + + fn parse_expr_precedence(&mut self, mut lhs: Ast, min_prec: u8) -> Ast { + while let Some(binop) = &self.peek().try_to_binop() { + if !(binop.precedence() >= min_prec) { + break; + } + + // The while condition already verified that this is some while peeking, so unwrap is + // valid + let binop = self.next().try_to_binop().unwrap(); + + let mut rhs = self.parse_primary(); + + while let Some(binop2) = &self.peek().try_to_binop() { + if !(binop2.precedence() > binop.precedence()) { + break; + } + + rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1); + } + + lhs = Ast::BinOp(binop, lhs.into(), rhs.into()); + } + + lhs + } + + /// Parse a primary expression (for now only number) + fn parse_primary(&mut self) -> Ast { + match self.next() { + Token::I64(val) => Ast::I64(val), + + tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok), + } + } + + /// Get the next Token without removing it + fn peek(&mut self) -> &Token { + self.tokens.peek().unwrap_or(&Token::EoF) + } + + /// Advance to next Token and return the removed Token + fn next(&mut self) -> Token { + self.tokens.next().unwrap_or(Token::EoF) + } +} + +pub fn parse, A: IntoIterator>(tokens: A) -> Ast { + let mut parser = Parser::new(tokens); + parser.parse() +} + +impl BinOpType { + /// Get the precedence for a binary operator. Higher value means the OP is stronger binding. + /// For example Multiplication is stronger than addition, so Mul has higher precedence than Add. + fn precedence(&self) -> u8 { + match self { + BinOpType::Add => 0, + BinOpType::Mul => 1, + } + } +}