Add comments & small additions

- Added more & better comments for `token.rs`, `lexer.rs`, `parser.rs`
- Implemented HashTag Token for Lexer
- Implemented additional safety checks for the Lexer::read functions
This commit is contained in:
Daniel M 2021-12-28 20:43:24 +01:00
parent 623fa71355
commit cfc585426d
5 changed files with 280 additions and 46 deletions

View File

@ -8,17 +8,18 @@ pub enum BinOpType {
Mul, Mul,
Div, Div,
Mod Mod,
} }
/// Unary Operator Types. For operations that have one operand /// Unary Operator Types. For operations that have one operand
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum UnOpType { pub enum UnOpType {
Neg Neg,
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct FnCall { pub struct FnCall {
pub intrinsic: bool,
pub fn_name: String, pub fn_name: String,
pub args: Vec<Expr>, pub args: Vec<Expr>,
} }
@ -39,3 +40,14 @@ pub enum Statement {
LetBinding(String, Expr), LetBinding(String, Expr),
Assignment(String, Expr), Assignment(String, Expr),
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Ast {
pub(crate) prog: Vec<Statement>,
}
impl Ast {
pub fn new(prog: Vec<Statement>) -> Self {
Self { prog }
}
}

View File

@ -1,6 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::{ast::{Statement, Expr}, token::Literal}; use crate::{ast::{Statement, Expr, Ast}, token::Literal};
pub struct Interpreter { pub struct Interpreter {
prog: Vec<Statement>, prog: Vec<Statement>,
@ -13,9 +13,9 @@ pub struct Interpreter {
} }
impl Interpreter { impl Interpreter {
pub fn new(prog: Vec<Statement>) -> Self { pub fn new(prog: Ast) -> Self {
let variables = Default::default(); let variables = Default::default();
Self { prog, variables, debug: true } Self { prog: prog.prog, variables, debug: true }
} }
pub fn run(&mut self) { pub fn run(&mut self) {

View File

@ -4,8 +4,12 @@ use super::token::*;
#[derive(Debug)] #[derive(Debug)]
pub enum LexErrType { pub enum LexErrType {
/// Lexer encountered an invalid character
InvalidCharacter(char), InvalidCharacter(char),
/// While lexing a string an invalid escaped character was encountered. Backslash '\\' followed
/// by the offending character
InvalidEscapeChar(char), InvalidEscapeChar(char),
/// While lexing a string, the closing quote did not occur before file end
MissingQuoteEnd, MissingQuoteEnd,
} }
@ -19,11 +23,17 @@ type LexRes<T> = Result<T, LexErr>;
pub struct Lexer<'a> { pub struct Lexer<'a> {
// code: &'a str, // code: &'a str,
/// Peekable iterator over the sourcecode utf-8 characters together with the byte indices
code_iter: Peekable<CharIndices<'a>>, code_iter: Peekable<CharIndices<'a>>,
/// The char & byte index pair that is currently being evaulated. This character will not be
/// present in the code_iter iterator since it has been removed already.
curr_char: Option<(usize, char)>, curr_char: Option<(usize, char)>,
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
/// Create a new Lexer from the given sourcecode string
pub fn new(code: &'a str) -> Self { pub fn new(code: &'a str) -> Self {
let mut code_iter = code.char_indices().peekable(); let mut code_iter = code.char_indices().peekable();
let curr_char = code_iter.next(); let curr_char = code_iter.next();
@ -34,26 +44,31 @@ impl<'a> Lexer<'a> {
} }
} }
/// Lex the sourcecode and produce a TokenStream containing the Tokens represented by the
/// sourcecode. This can fail due to a few lexing errors like encountering unknown / unhandled
/// chars, non terminated quotes and so on. Syntactic errors are not detected at this point.
pub fn tokenize(&mut self) -> LexRes<TokenStream> { pub fn tokenize(&mut self) -> LexRes<TokenStream> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
loop { // Iterate through the whole sourcecode until EOF is reached
let (_idx, ch) = match self.curr_char { while let Some((_idx, ch)) = self.curr_char {
Some(it) => it,
None => break,
};
// Peek the next char & byte index for matching multi-char tokens
let (_idx_nxt, ch_nxt) = self let (_idx_nxt, ch_nxt) = self
.peek() .peek()
.map(|(a, b)| (Some(a), Some(b))) .map(|(a, b)| (Some(a), Some(b)))
.unwrap_or_default(); .unwrap_or_default();
// Match the current char to decide what Token is represented
match ch { match ch {
// Skip whitespace // Skip whitespace
' ' | '\t' | '\n' | '\r' => (), ' ' | '\t' | '\n' | '\r' => (),
// Lex tokens with 2 char length // Lex tokens with 2 char length. This matches on the current char and also the next
// Double slash '/' is a comment, so skip ahead to the next line
'/' if matches!(ch_nxt, Some('/')) => self.advance_until_new_line(), '/' if matches!(ch_nxt, Some('/')) => self.advance_until_new_line(),
'=' if matches!(ch_nxt, Some('=')) => { '=' if matches!(ch_nxt, Some('=')) => {
self.advance(); self.advance();
tokens.push(Token::Op(Op::Eq)); tokens.push(Token::Op(Op::Eq));
@ -83,7 +98,7 @@ impl<'a> Lexer<'a> {
tokens.push(Token::Op(Op::Or)); tokens.push(Token::Op(Op::Or));
} }
// Lex tokens with 1 char length // Lex tokens with 1 char length. This just matches the current char
'+' => tokens.push(Token::Op(Op::Add)), '+' => tokens.push(Token::Op(Op::Add)),
'-' => tokens.push(Token::Op(Op::Sub)), '-' => tokens.push(Token::Op(Op::Sub)),
'*' => tokens.push(Token::Op(Op::Mul)), '*' => tokens.push(Token::Op(Op::Mul)),
@ -104,14 +119,16 @@ impl<'a> Lexer<'a> {
'.' => tokens.push(Token::Dot), '.' => tokens.push(Token::Dot),
'!' => tokens.push(Token::Op(Op::Not)), '!' => tokens.push(Token::Op(Op::Not)),
'^' => tokens.push(Token::Op(Op::Xor)), '^' => tokens.push(Token::Op(Op::Xor)),
'#' => tokens.push(Token::Hashtag),
// Lex Strings // A quote represents a string start, so lex a string token here
'"' => tokens.push(self.read_string()?), '"' => tokens.push(self.read_string()?),
// Lex numbers // A numeric digit represents a number start, so lex a number here
'0'..='9' => tokens.push(self.read_num()?), '0'..='9' => tokens.push(self.read_num()?),
// Lex identifiers / keywords // An alphabetical char or underscore represents an identifier or keyword start, so
// lex an identifier or keyword here
'a'..='z' | 'A'..='Z' | '_' => tokens.push(self.read_ident_or_keyword()?), 'a'..='z' | 'A'..='Z' | '_' => tokens.push(self.read_ident_or_keyword()?),
// Anything else is an error // Anything else is an error
@ -120,32 +137,47 @@ impl<'a> Lexer<'a> {
} }
} }
// Consume the current token
self.advance(); self.advance();
} }
Ok(TokenStream::new(tokens)) Ok(TokenStream::new(tokens))
} }
/// Get the next char & byte index. Don't consume the current char
fn peek(&mut self) -> Option<&(usize, char)> { fn peek(&mut self) -> Option<&(usize, char)> {
self.code_iter.peek() self.code_iter.peek()
} }
/// Consume the current char and fetch the next
fn advance(&mut self) { fn advance(&mut self) {
self.curr_char = self.code_iter.next(); self.curr_char = self.code_iter.next();
} }
/// Consume all characters until the next line. The last character before the next line is
/// still kept in curr_char to be consumed by the tokenize function.
fn advance_until_new_line(&mut self) { fn advance_until_new_line(&mut self) {
while !matches!(self.curr_char, Some((_, '\n'))) { while !matches!(self.curr_char, Some((_, '\n'))) {
self.advance(); self.advance();
} }
if matches!(self.curr_char, Some((_, '\r'))) { if matches!(self.peek(), Some((_, '\r'))) {
self.advance(); self.advance();
} }
} }
/// Lex a number consisting of one or more digits, starting at the current char. The last digit
/// is kept in curr_char to be consumed by the tokenize function.
fn read_num(&mut self) -> LexRes<Token> { fn read_num(&mut self) -> LexRes<Token> {
match self.curr_char {
Some((_, '0'..='9')) => (),
_ => panic!("Lexer::read_num must not be called without having a digit in curr_char")
}
// The function is only called if the curr_char is the beginning of a number, so curr_char
// is guaranteed to be Some at this point
let mut snum = format!("{}", self.curr_char.unwrap().1); let mut snum = format!("{}", self.curr_char.unwrap().1);
// Append the next chars to the string number until there are no digits anymore
while let Some((_idx, ch)) = self.peek() { while let Some((_idx, ch)) = self.peek() {
match ch { match ch {
'0'..='9' => snum.push(*ch), '0'..='9' => snum.push(*ch),
@ -160,11 +192,24 @@ impl<'a> Lexer<'a> {
Ok(Token::Literal(Literal::Int64(snum.parse().unwrap()))) Ok(Token::Literal(Literal::Int64(snum.parse().unwrap())))
} }
/// Lex a string consisting of any text enclosed by doublequotes with support for backslash
/// escapes. The opening quote must be in curr_char already. The closing quote is kept in
/// curr_char to be consumed by the tokenize function.
fn read_string(&mut self) -> LexRes<Token> { fn read_string(&mut self) -> LexRes<Token> {
match self.curr_char {
Some((_, '"')) => (),
_ => panic!("Lexer::read_string must not be called without having a '\"' in curr_char")
}
let mut text = String::new(); let mut text = String::new();
// If true, the next character is an escaped char. This is set to true, if the last char
// was a backslash
let mut escape = false; let mut escape = false;
loop { loop {
// If the end of the sourcecode is reached while still lexing a string, there must have
// been a quote missing
let (_idx, ch) = match self.peek() { let (_idx, ch) = match self.peek() {
Some(it) => *it, Some(it) => *it,
None => return Err(LexErr::new(LexErrType::MissingQuoteEnd)), None => return Err(LexErr::new(LexErrType::MissingQuoteEnd)),
@ -173,7 +218,7 @@ impl<'a> Lexer<'a> {
if escape { if escape {
match ch { match ch {
'"' | '\\' => text.push(ch), '"' | '\\' => text.push(ch),
'\n' => text.push('\n'), 'n' => text.push('\n'),
'r' => text.push('\r'), 'r' => text.push('\r'),
't' => text.push('\t'), 't' => text.push('\t'),
_ => return Err(LexErr::new(LexErrType::InvalidEscapeChar(ch))), _ => return Err(LexErr::new(LexErrType::InvalidEscapeChar(ch))),
@ -194,7 +239,17 @@ impl<'a> Lexer<'a> {
Ok(Token::Literal(Literal::String(text))) Ok(Token::Literal(Literal::String(text)))
} }
/// Lex an identifier or keyword consisting of alphabetic characters, digits and underscores
/// and starting with a alphabetic character or underscore. The first character is in curr_char
/// and the last character is left in curr_char to be consumed by the tokenize function.
/// If the identifier is a language keyword it is lexed as the appropriate token instead of a
/// generall identifier token.
fn read_ident_or_keyword(&mut self) -> LexRes<Token> { fn read_ident_or_keyword(&mut self) -> LexRes<Token> {
match self.curr_char {
Some((_, 'a'..='z' | 'A'..='Z' | '_')) => (),
_ => panic!("Lexer::read_num must not be called without having a char or '_' in curr_char")
}
let mut ident = format!("{}", self.curr_char.unwrap().1); let mut ident = format!("{}", self.curr_char.unwrap().1);
while let Some((_idx, ch)) = self.peek() { while let Some((_idx, ch)) = self.peek() {
@ -205,6 +260,7 @@ impl<'a> Lexer<'a> {
self.advance(); self.advance();
} }
// Check if the identifier is a language keyword
let token = match ident.as_str() { let token = match ident.as_str() {
"let" => Token::Keyword(Keyword::Let), "let" => Token::Keyword(Keyword::Let),
"if" => Token::Keyword(Keyword::If), "if" => Token::Keyword(Keyword::If),
@ -249,7 +305,7 @@ mod test {
([{)]} ([{)]}
4564 "a string" false true 4564 "a string" false true
an_5ident6 an_5ident6
; : , . ; : , . #
let if while loop else fn return void let if while loop else fn return void
"#; "#;
@ -294,6 +350,7 @@ mod test {
Token::Colon, Token::Colon,
Token::Comma, Token::Comma,
Token::Dot, Token::Dot,
Token::Hashtag,
Token::Keyword(Keyword::Let), Token::Keyword(Keyword::Let),
Token::Keyword(Keyword::If), Token::Keyword(Keyword::If),

View File

@ -1,5 +1,5 @@
use crate::{ use crate::{
ast::{BinOpType, Expr, FnCall, Statement, UnOpType}, ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType},
token::{Group, Keyword, Op, Token, TokenStream}, token::{Group, Keyword, Op, Token, TokenStream},
}; };
@ -8,6 +8,22 @@ pub struct ParseErr;
type PRes<T> = Result<T, ParseErr>; type PRes<T> = Result<T, ParseErr>;
/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree).
///
/// ## Grammar
/// ### Statements
/// `stmt_let = "let" ident "=" expr_add` \
/// `stmt_assign = ident "=" expr_add` \
/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` \
///
/// ### Expressions
/// `expr_literal = LITERAL` \
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` \
/// `expr_varibale = IDENT` \
/// `expr_value = expr_literal | expr_fn_call | expr_variable` \
/// `expr_term = "-" expr_term | "(" expr_add ")" | expr_value` \
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \
/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \
pub struct Parser { pub struct Parser {
tokens: TokenStream, tokens: TokenStream,
} }
@ -16,13 +32,12 @@ pub struct Parser {
# GRAMMAR # GRAMMAR
## expressions ## expressions
ident = IDENT
expr_literal = LITERAL expr_literal = LITERAL
expr_fn_call = ident "(" expr_add? ( "," expr_add )* ")" expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"
expr_varibale = ident expr_varibale = IDENT
expr_value = expr_literal | expr_fn_call | expr_variable expr_value = expr_literal | expr_fn_call | expr_variable
expr_term = "-" expr_term | "(" expr_add ")" | expr_literal expr_term = "-" expr_term | "(" expr_add ")" | expr_value
expr_mul = expr_term (("*"|"/") expr_term)* expr_mul = expr_term (("*"|"/"|"%") expr_term)*
expr_add = expr_mul (("+"|"-") expr_mul)* expr_add = expr_mul (("+"|"-") expr_mul)*
## statements ## statements
@ -31,24 +46,31 @@ stmt_assign = ident "=" expr_add
stmt = ( stmt_let | stmt_assign | expr_add ) ";" stmt = ( stmt_let | stmt_assign | expr_add ) ";"
*/ */
impl Parser { impl Parser {
/// Create a new parser from a TokenStream
pub fn new(tokens: TokenStream) -> Self { pub fn new(tokens: TokenStream) -> Self {
Self { tokens } Self { tokens }
} }
/// Get the current token without consuming it
pub fn curr(&self) -> Option<&Token> { pub fn curr(&self) -> Option<&Token> {
self.tokens.curr() self.tokens.curr()
} }
/// Get the next token without consuming it
pub fn peek(&self) -> Option<&Token> { pub fn peek(&self) -> Option<&Token> {
self.tokens.peek() self.tokens.peek()
} }
/// Advance to the next token, consuming it in the process
pub fn advance(&mut self) -> Option<&Token> { pub fn advance(&mut self) -> Option<&Token> {
self.tokens.advance() self.tokens.advance()
} }
pub fn parse(&mut self) -> PRes<Vec<Statement>> { /// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a
/// sequence of statements.
pub fn parse(&mut self) -> PRes<Ast> {
let mut prog = Vec::new(); let mut prog = Vec::new();
while let Some(tok) = self.curr() { while let Some(tok) = self.curr() {
@ -62,18 +84,29 @@ impl Parser {
} }
} }
Ok(prog) Ok(Ast::new(prog))
} }
/// Parse a statement from the TokenStream. This consists of an expression, a let statement or
/// an assignment.
///
/// ### Grammar
/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"`
pub fn parse_statement(&mut self) -> PRes<Statement> { pub fn parse_statement(&mut self) -> PRes<Statement> {
// Check the current and next char to decide what kind of statement is being parsed
let stmt = match self.curr() { let stmt = match self.curr() {
// A let token -> Parse a let statement
Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(), Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(),
// Ident and "=" -> An assignment without declaration (let)
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => { Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => {
self.parse_stmt_assign() self.parse_stmt_assign()
} }
// Otherwise -> A simple expression
_ => self.parse_expr_add().map(|expr| Statement::Expr(expr)), _ => self.parse_expr_add().map(|expr| Statement::Expr(expr)),
}; };
// Check that the statement is terminated with a semicolon.
// TODO: This is not needed for block based statements like `while expr { ... }`
if !matches!(self.advance(), Some(Token::Semicolon)) { if !matches!(self.advance(), Some(Token::Semicolon)) {
panic!("Expected ';' while parsing statement"); panic!("Expected ';' while parsing statement");
} }
@ -81,20 +114,29 @@ impl Parser {
stmt stmt
} }
/// Parse a let statement from the TokenStream. This consists of a let token, an identifier,
/// an equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_let = "let" ident "=" expr_add`
pub fn parse_stmt_let(&mut self) -> PRes<Statement> { pub fn parse_stmt_let(&mut self) -> PRes<Statement> {
// Check if the let token is there
if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) { if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) {
panic!("Unexpected token while parsing let statement. Expected 'let'"); panic!("Unexpected token while parsing let statement. Expected 'let'");
} }
// Fetch the variable name
let var_name = match self.advance() { let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(), Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing let statement. Expected ident"), _ => panic!("Unexpected token while parsing let statement. Expected ident"),
}; };
// Check if the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let statement. Expected '='"); panic!("Unexpected token while parsing let statement. Expected '='");
} }
// Parse the right hand side of the let statement
let rhs = self.parse_expr_add()?; let rhs = self.parse_expr_add()?;
let let_binding = Statement::LetBinding(var_name, rhs); let let_binding = Statement::LetBinding(var_name, rhs);
@ -102,16 +144,24 @@ impl Parser {
Ok(let_binding) Ok(let_binding)
} }
/// Parse an assignment statement from the TokenStream. This consists of a an identifier, an
/// equal sign "=" and an expression.
///
/// ### Grammar
/// `stmt_assign = ident "=" expr_add`
pub fn parse_stmt_assign(&mut self) -> PRes<Statement> { pub fn parse_stmt_assign(&mut self) -> PRes<Statement> {
// Fetch the variable name
let var_name = match self.advance() { let var_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(), Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing assignment statement. Expected ident"), _ => panic!("Unexpected token while parsing assignment statement. Expected ident"),
}; };
// Check that the equal sign is present
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
panic!("Unexpected token while parsing let assignment. Expected '='"); panic!("Unexpected token while parsing let assignment. Expected '='");
} }
// Parse the right hand side of the assignment
let rhs = self.parse_expr_add()?; let rhs = self.parse_expr_add()?;
let let_binding = Statement::Assignment(var_name, rhs); let let_binding = Statement::Assignment(var_name, rhs);
@ -119,14 +169,26 @@ impl Parser {
Ok(let_binding) Ok(let_binding)
} }
/// The main expression parsing function. This can be a multiplication expression and 0 or more
/// further multiplication expressions separated by addition precedence operators (add '+',
/// sub '-').
///
/// Add is the operator with the lowest precedence which is why this recursively handles all
/// other kinds of expressions.
///
/// ### Grammar
/// `expr_add = expr_mul (("+"|"-") expr_mul)*`
pub fn parse_expr_add(&mut self) -> PRes<Expr> { pub fn parse_expr_add(&mut self) -> PRes<Expr> {
let mut a = self.parse_expr_mul()?; // Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_mul()?;
// Parse 0 or more expressions to the right side of the add operators
while matches!(self.curr(), Some(Token::Op(Op::Add | Op::Sub))) { while matches!(self.curr(), Some(Token::Op(Op::Add | Op::Sub))) {
// We successfully matched curr against Some already in the while condition, so unwrap is fine // We successfully matched curr against Some already in the while condition, so unwrap
// is fine
let tok_op = self.advance().unwrap().clone(); let tok_op = self.advance().unwrap().clone();
let b = self.parse_expr_mul()?; let rhs = self.parse_expr_mul()?;
let op_type = match tok_op { let op_type = match tok_op {
Token::Op(Op::Add) => BinOpType::Add, Token::Op(Op::Add) => BinOpType::Add,
@ -134,15 +196,23 @@ impl Parser {
_ => unreachable!(), _ => unreachable!(),
}; };
a = Expr::BinOp(op_type, a.into(), b.into()); lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
} }
Ok(a) Ok(lhs)
} }
/// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more
/// further terms separated by multiplication precedence operators (multiply '*', divide '/',
/// modulo '%')
///
/// ### Grammar
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*`
pub fn parse_expr_mul(&mut self) -> PRes<Expr> { pub fn parse_expr_mul(&mut self) -> PRes<Expr> {
let mut a = self.parse_expr_term()?; // Parse the left hand side / the main expression if there is nothing on the right
let mut lhs = self.parse_expr_term()?;
// Parse 0 or more expressions to the right side of the mul operators
while matches!(self.curr(), Some(Token::Op(Op::Mul | Op::Div | Op::Mod))) { while matches!(self.curr(), Some(Token::Op(Op::Mul | Op::Div | Op::Mod))) {
// We successfully matched curr against Some already in the while condition, so unwrap is fine // We successfully matched curr against Some already in the while condition, so unwrap is fine
let tok_op = self.advance().unwrap().clone(); let tok_op = self.advance().unwrap().clone();
@ -156,31 +226,52 @@ impl Parser {
_ => unreachable!(), _ => unreachable!(),
}; };
a = Expr::BinOp(op_type, a.into(), b.into()); lhs = Expr::BinOp(op_type, lhs.into(), b.into());
} }
Ok(a) Ok(lhs)
} }
/// Parse a term expression from the TokenSteam. This can be the negation of a term, an add
/// expression enclosed by parentheses or a value.
///
/// ### Grammar
/// `"-" expr_term | "(" expr_add ")" | expr_value`
pub fn parse_expr_term(&mut self) -> PRes<Expr> { pub fn parse_expr_term(&mut self) -> PRes<Expr> {
let term = match self.curr() { let term = match self.curr() {
// Current token is an opening parentheses '(' -> Must be an enclosed expr_add
Some(Token::Open(Group::Paren)) => { Some(Token::Open(Group::Paren)) => {
// Skip the '('
self.advance(); self.advance();
let a = self.parse_expr_add()?;
let expr = self.parse_expr_add()?;
// After the expression must be closing parentheses ')'
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Missing closing parentheses while parsing term"); panic!("Missing closing parentheses while parsing term");
} }
a
expr
} }
// Current token is a minus '-' -> Must be a negated expr_term
Some(Token::Op(Op::Sub)) => { Some(Token::Op(Op::Sub)) => {
// Skip the '-'
self.advance(); self.advance();
// Parse an expr_term in a Negation Node
Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into())
} }
// Nothing special in the current -> Must be an expr_value
_ => self.parse_expr_value()?, _ => self.parse_expr_value()?,
}; };
Ok(term) Ok(term)
} }
/// Parse a value expression from the TokenSteam. This can be a literal value, a function call
/// or a variable.
///
/// ### Grammar
/// `expr_value = expr_literal | expr_fn_call | expr_variable`
pub fn parse_expr_value(&mut self) -> PRes<Expr> { pub fn parse_expr_value(&mut self) -> PRes<Expr> {
match self.curr() { match self.curr() {
Some(Token::Literal(_)) => self.parse_expr_literal(), Some(Token::Literal(_)) => self.parse_expr_literal(),
@ -192,37 +283,62 @@ impl Parser {
} }
} }
/// Parse a function call from the TokenStream. This consists of an identifier and 0 or more
/// add expressions enclosed by parentheses '(', ')' and separated by commas ',' .
///
/// ### Grammar
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"`
pub fn parse_expr_fn_call(&mut self) -> PRes<Expr> { pub fn parse_expr_fn_call(&mut self) -> PRes<Expr> {
// The first 2 checks are not really necessary for internal calls since parse_expr_value // The first 2 checks are not really necessary for internal calls since parse_expr_value
// verifies the tokens already // verifies the tokens already
// Get the function name
let fn_name = match self.advance() { let fn_name = match self.advance() {
Some(Token::Ident(ident)) => ident.clone(), Some(Token::Ident(ident)) => ident.clone(),
_ => panic!("Unexpected token while parsing function call. Expected identifier"), _ => panic!("Unexpected token while parsing function call. Expected identifier"),
}; };
// Check that there really is an opening parentheses
if !matches!(self.advance(), Some(Token::Open(Group::Paren))) { if !matches!(self.advance(), Some(Token::Open(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('"); panic!("Unexpected token while parsing function call. Expected '('");
} }
let mut args = Vec::new(); let mut args = Vec::new();
// If there is not a closing parentheses directly after the opening "()", parse at least
// one add expression
// TODO: This is *suboptimal* code // TODO: This is *suboptimal* code
if !matches!(self.curr(), Some(Token::Close(Group::Paren))) { if !matches!(self.curr(), Some(Token::Close(Group::Paren))) {
args.push(self.parse_expr_add()?); args.push(self.parse_expr_add()?);
// As long as there are commas after the expressions, parse more expressions as
// parameters
while matches!(self.curr(), Some(Token::Comma)) { while matches!(self.curr(), Some(Token::Comma)) {
self.advance(); self.advance();
args.push(self.parse_expr_add()?); args.push(self.parse_expr_add()?);
} }
} }
// Check if there really is a closing parentheses
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
panic!("Unexpected token while parsing function call. Expected '('"); panic!("Unexpected token while parsing function call. Expected '('");
} }
Ok(Expr::FnCall(FnCall { fn_name, args })) // By default don't parse as an intrinsic function
let intrinsic = false;
Ok(Expr::FnCall(FnCall {
intrinsic,
fn_name,
args,
}))
} }
/// Parse a variable name value. This consists of an identifier without parentheses afterwards.
/// The identifier represents the variable name.
///
/// ### Grammar
/// `expr_varibale = IDENT`
pub fn parse_expr_varibale(&mut self) -> PRes<Expr> { pub fn parse_expr_varibale(&mut self) -> PRes<Expr> {
match self.advance() { match self.advance() {
Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())), Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())),
@ -230,6 +346,10 @@ impl Parser {
} }
} }
/// Parse a literal value. This consists of a literal token.
///
/// ### Grammar
/// `expr_literal = LITERAL`
pub fn parse_expr_literal(&mut self) -> PRes<Expr> { pub fn parse_expr_literal(&mut self) -> PRes<Expr> {
match self.advance() { match self.advance() {
Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())), Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())),
@ -308,6 +428,7 @@ mod tests {
Box::new(Expr::UnOp( Box::new(Expr::UnOp(
UnOpType::Neg, UnOpType::Neg,
Expr::FnCall(FnCall { Expr::FnCall(FnCall {
intrinsic: false,
fn_name, fn_name,
args: vec![Expr::Literal(Literal::Int64(9))], args: vec![Expr::Literal(Literal::Int64(9))],
}) })

View File

@ -4,41 +4,66 @@ use std::{fmt::Display, borrow::Cow};
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Op { pub enum Op {
// Addition // Addition
/// Add "+"
Add, Add,
/// Subtract "-"
Sub, Sub,
// Multiplications // Multiplications
/// Multiply "*"
Mul, Mul,
/// Divide "/"
Div, Div,
/// Modulo "%"
Mod, Mod,
// Assignment /// Assignment "="
Assign, Assign,
// Equality // Equality
/// Equal "=="
Eq, Eq,
/// Not equal "!="
Neq, Neq,
/// Greater than ">"
Gt, Gt,
/// Lesser than "<"
Lt, Lt,
/// Greater or equal ">="
Ge, Ge,
/// Lesser or equal "<="
Le, Le,
// Bool // Boolean
/// And "&&"
And, And,
/// Or "||"
Or, Or,
/// Not "!"
Not, Not,
/// Xor "^"
Xor, Xor,
/// Arrow "->"
Arrow, Arrow,
} }
/// Different types of parentheses
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Group { pub enum Group {
/// Parentheses "(" | ")"
Paren, Paren,
/// Brackets "[" | "]"
Bracket, Bracket,
/// Braces "{" | "}"
Braces, Braces,
} }
/// Literal values for the different datatypes
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Literal { pub enum Literal {
Boolean(bool), Boolean(bool),
@ -46,6 +71,7 @@ pub enum Literal {
String(String), String(String),
} }
/// Language Keywords
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Keyword { pub enum Keyword {
Let, Let,
@ -58,23 +84,35 @@ pub enum Keyword {
Void, Void,
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Token { pub enum Token {
/// Literal values
Literal(Literal), Literal(Literal),
/// Operators
Op(Op), Op(Op),
/// Opening parentheses
Open(Group), Open(Group),
/// Closing parentheses
Close(Group), Close(Group),
/// Identifier
Ident(String), Ident(String),
/// Language keywords
Keyword(Keyword), Keyword(Keyword),
/// Semicolon ";"
Semicolon, Semicolon,
/// Colon ":"
Colon, Colon,
/// Comma ","
Comma, Comma,
/// Dot "."
Dot, Dot,
/// Hashtag "#"
Hashtag,
} }
/// A token buffer with an index for iterating over the tokens
pub struct TokenStream { pub struct TokenStream {
tokens: Vec<Token>, tokens: Vec<Token>,
idx: usize, idx: usize,
@ -83,7 +121,8 @@ pub struct TokenStream {
impl Display for Token { impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let op: Cow<'static, str> = match self { // String representation of the Token
let stok: Cow<'static, str> = match self {
Token::Op(Op::Add) => "+".into(), Token::Op(Op::Add) => "+".into(),
Token::Op(Op::Sub) => "-".into(), Token::Op(Op::Sub) => "-".into(),
@ -123,6 +162,7 @@ impl Display for Token {
Token::Colon => ":".into(), Token::Colon => ":".into(),
Token::Comma => ",".into(), Token::Comma => ",".into(),
Token::Dot => ".".into(), Token::Dot => ".".into(),
Token::Hashtag => "#".into(),
Token::Keyword(Keyword::Let) => "let".into(), Token::Keyword(Keyword::Let) => "let".into(),
Token::Keyword(Keyword::If) => "if".into(), Token::Keyword(Keyword::If) => "if".into(),
@ -134,28 +174,32 @@ impl Display for Token {
Token::Keyword(Keyword::Void) => "void".into(), Token::Keyword(Keyword::Void) => "void".into(),
}; };
write!(f, "{}", op) write!(f, "{}", stok)
} }
} }
impl TokenStream { impl TokenStream {
/// Create a new TokenStream from the given token buffer
pub fn new(tokens: Vec<Token>) -> Self { pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, idx: 0 } Self { tokens, idx: 0 }
} }
pub fn as_vec(&self) -> &Vec<Token> { /// Get the underlying token buffer as reference
pub fn as_vec(&self) -> &[Token] {
&self.tokens &self.tokens
} }
/// Get the current token as reference. This does not advance to the next token
pub fn curr(&self) -> Option<&Token> { pub fn curr(&self) -> Option<&Token> {
self.tokens.get(self.idx) self.tokens.get(self.idx)
} }
/// Get the next token as reference. This does not advance to the next token
pub fn peek(&self) -> Option<&Token> { pub fn peek(&self) -> Option<&Token> {
self.tokens.get(self.idx + 1) self.tokens.get(self.idx + 1)
} }
/// Advance to the next token. Sets curr to next and returns the old curr. /// Advance to the next token. Sets curr to next and returns the old curr
pub fn advance(&mut self) -> Option<&Token> { pub fn advance(&mut self) -> Option<&Token> {
self.idx += 1; self.idx += 1;
self.tokens.get(self.idx - 1) self.tokens.get(self.idx - 1)
@ -163,7 +207,7 @@ impl TokenStream {
} }
impl Display for TokenStream { impl Display for TokenStream {
/// Print the TokenStream with autofomatting /// Print the TokenStream with autoformatting
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut indent = 0_usize; let mut indent = 0_usize;
let mut fresh_line = true; let mut fresh_line = true;