Start implementing Parser
- Implemented pratt parser for expressions - Implemented BinOps Add, Mul
This commit is contained in:
parent
02f63ad9ad
commit
541d905551
33
src/lexer.rs
33
src/lexer.rs
@ -1,12 +1,20 @@
|
||||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
use crate::parser::BinOpType;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
/// Integer literal (64-bit)
|
||||
I64(i64),
|
||||
|
||||
/// Plus (+)
|
||||
Add,
|
||||
|
||||
/// Asterisk (*)
|
||||
Mul,
|
||||
|
||||
/// End of file
|
||||
EoF,
|
||||
}
|
||||
|
||||
struct Lexer<'a> {
|
||||
@ -24,7 +32,10 @@ impl<'a> Lexer<'a> {
|
||||
|
||||
while let Some(ch) = self.next() {
|
||||
match ch {
|
||||
// Skip whitespace
|
||||
' ' => (),
|
||||
|
||||
// Lex numbers
|
||||
'0'..='9' => {
|
||||
let mut sval = String::from(ch);
|
||||
|
||||
@ -40,13 +51,12 @@ impl<'a> Lexer<'a> {
|
||||
|
||||
'+' => tokens.push(Token::Add),
|
||||
'*' => tokens.push(Token::Mul),
|
||||
|
||||
|
||||
//TODO: Don't panic, keep calm
|
||||
_ => panic!("Lexer encountered unexpected char: '{}'", ch),
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
tokens
|
||||
}
|
||||
|
||||
@ -55,20 +65,33 @@ impl<'a> Lexer<'a> {
|
||||
self.code.next()
|
||||
}
|
||||
|
||||
/// Shows next character
|
||||
/// Get the next character without removing it
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.code.peek().copied()
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex the provided code into a Token Buffer
|
||||
///
|
||||
/// TODO: Don't panic and implement error handling using Result
|
||||
pub fn lex(code: &str) -> Vec<Token> {
|
||||
let mut lexer = Lexer::new(code);
|
||||
lexer.lex()
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn try_to_binop(&self) -> Option<BinOpType> {
|
||||
Some(match self {
|
||||
Token::Add => BinOpType::Add,
|
||||
Token::Mul => BinOpType::Mul,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Token, lex};
|
||||
use super::{lex, Token};
|
||||
|
||||
#[test]
|
||||
fn test_lexer() {
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
pub mod lexer;
|
||||
|
||||
pub mod parser;
|
||||
|
||||
15
src/main.rs
15
src/main.rs
@ -1,12 +1,23 @@
|
||||
use nek_lang::lexer::lex;
|
||||
use nek_lang::{lexer::lex, parser::parse};
|
||||
|
||||
|
||||
fn main() {
|
||||
|
||||
let code = "33 +5*2";
|
||||
// Should produce ast: Add {
|
||||
// lhs: I64(33),
|
||||
// rhs: Mul: {
|
||||
// lhs: I64(5),
|
||||
// rhs: I64(2)
|
||||
// }
|
||||
// }
|
||||
|
||||
let tokens = lex(code);
|
||||
|
||||
println!("{:?}", tokens);
|
||||
println!("Tokens: {:?}\n", tokens);
|
||||
|
||||
let ast = parse(tokens);
|
||||
|
||||
println!("Ast: {:#?}", ast);
|
||||
|
||||
}
|
||||
|
||||
102
src/parser.rs
Normal file
102
src/parser.rs
Normal file
@ -0,0 +1,102 @@
|
||||
use std::iter::Peekable;
|
||||
|
||||
use crate::lexer::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BinOpType {
|
||||
/// Addition
|
||||
Add,
|
||||
|
||||
/// Multiplication
|
||||
Mul,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Ast {
|
||||
/// Integer literal (64-bit)
|
||||
I64(i64),
|
||||
/// Binary operation. Consists of type, left hand side and right hand side
|
||||
BinOp(BinOpType, Box<Ast>, Box<Ast>),
|
||||
}
|
||||
|
||||
struct Parser<T: Iterator<Item = Token>> {
|
||||
tokens: Peekable<T>,
|
||||
}
|
||||
|
||||
impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
/// Create a new parser to parse the given Token Stream
|
||||
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
||||
let tokens = tokens.into_iter().peekable();
|
||||
Self { tokens }
|
||||
}
|
||||
|
||||
fn parse(&mut self) -> Ast {
|
||||
self.parse_expr()
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Ast {
|
||||
let lhs = self.parse_primary();
|
||||
self.parse_expr_precedence(lhs, 0)
|
||||
}
|
||||
|
||||
fn parse_expr_precedence(&mut self, mut lhs: Ast, min_prec: u8) -> Ast {
|
||||
while let Some(binop) = &self.peek().try_to_binop() {
|
||||
if !(binop.precedence() >= min_prec) {
|
||||
break;
|
||||
}
|
||||
|
||||
// The while condition already verified that this is some while peeking, so unwrap is
|
||||
// valid
|
||||
let binop = self.next().try_to_binop().unwrap();
|
||||
|
||||
let mut rhs = self.parse_primary();
|
||||
|
||||
while let Some(binop2) = &self.peek().try_to_binop() {
|
||||
if !(binop2.precedence() > binop.precedence()) {
|
||||
break;
|
||||
}
|
||||
|
||||
rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1);
|
||||
}
|
||||
|
||||
lhs = Ast::BinOp(binop, lhs.into(), rhs.into());
|
||||
}
|
||||
|
||||
lhs
|
||||
}
|
||||
|
||||
/// Parse a primary expression (for now only number)
|
||||
fn parse_primary(&mut self) -> Ast {
|
||||
match self.next() {
|
||||
Token::I64(val) => Ast::I64(val),
|
||||
|
||||
tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the next Token without removing it
|
||||
fn peek(&mut self) -> &Token {
|
||||
self.tokens.peek().unwrap_or(&Token::EoF)
|
||||
}
|
||||
|
||||
/// Advance to next Token and return the removed Token
|
||||
fn next(&mut self) -> Token {
|
||||
self.tokens.next().unwrap_or(Token::EoF)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
|
||||
let mut parser = Parser::new(tokens);
|
||||
parser.parse()
|
||||
}
|
||||
|
||||
impl BinOpType {
|
||||
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
|
||||
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
|
||||
fn precedence(&self) -> u8 {
|
||||
match self {
|
||||
BinOpType::Add => 0,
|
||||
BinOpType::Mul => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user