Add comments & small additions
- Added more & better comments for `token.rs`, `lexer.rs`, `parser.rs` - Implemented HashTag Token for Lexer - Implemented additional safety checks for the Lexer::read functions
This commit is contained in:
parent
623fa71355
commit
cfc585426d
@ -8,17 +8,18 @@ pub enum BinOpType {
|
||||
|
||||
Mul,
|
||||
Div,
|
||||
Mod
|
||||
Mod,
|
||||
}
|
||||
|
||||
/// Unary Operator Types. For operations that have one operand
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum UnOpType {
|
||||
Neg
|
||||
Neg,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct FnCall {
|
||||
pub intrinsic: bool,
|
||||
pub fn_name: String,
|
||||
pub args: Vec<Expr>,
|
||||
}
|
||||
@ -39,3 +40,14 @@ pub enum Statement {
|
||||
LetBinding(String, Expr),
|
||||
Assignment(String, Expr),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Ast {
|
||||
pub(crate) prog: Vec<Statement>,
|
||||
}
|
||||
|
||||
impl Ast {
|
||||
pub fn new(prog: Vec<Statement>) -> Self {
|
||||
Self { prog }
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{ast::{Statement, Expr}, token::Literal};
|
||||
use crate::{ast::{Statement, Expr, Ast}, token::Literal};
|
||||
|
||||
pub struct Interpreter {
|
||||
prog: Vec<Statement>,
|
||||
@ -13,9 +13,9 @@ pub struct Interpreter {
|
||||
}
|
||||
|
||||
impl Interpreter {
|
||||
pub fn new(prog: Vec<Statement>) -> Self {
|
||||
pub fn new(prog: Ast) -> Self {
|
||||
let variables = Default::default();
|
||||
Self { prog, variables, debug: true }
|
||||
Self { prog: prog.prog, variables, debug: true }
|
||||
}
|
||||
|
||||
pub fn run(&mut self) {
|
||||
|
||||
@ -4,8 +4,12 @@ use super::token::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LexErrType {
|
||||
/// Lexer encountered an invalid character
|
||||
InvalidCharacter(char),
|
||||
/// While lexing a string an invalid escaped character was encountered. Backslash '\\' followed
|
||||
/// by the offending character
|
||||
InvalidEscapeChar(char),
|
||||
/// While lexing a string, the closing quote did not occur before file end
|
||||
MissingQuoteEnd,
|
||||
}
|
||||
|
||||
@ -19,11 +23,17 @@ type LexRes<T> = Result<T, LexErr>;
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
// code: &'a str,
|
||||
|
||||
/// Peekable iterator over the sourcecode utf-8 characters together with the byte indices
|
||||
code_iter: Peekable<CharIndices<'a>>,
|
||||
|
||||
/// The char & byte index pair that is currently being evaulated. This character will not be
|
||||
/// present in the code_iter iterator since it has been removed already.
|
||||
curr_char: Option<(usize, char)>,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Create a new Lexer from the given sourcecode string
|
||||
pub fn new(code: &'a str) -> Self {
|
||||
let mut code_iter = code.char_indices().peekable();
|
||||
let curr_char = code_iter.next();
|
||||
@ -34,26 +44,31 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex the sourcecode and produce a TokenStream containing the Tokens represented by the
|
||||
/// sourcecode. This can fail due to a few lexing errors like encountering unknown / unhandled
|
||||
/// chars, non terminated quotes and so on. Syntactic errors are not detected at this point.
|
||||
pub fn tokenize(&mut self) -> LexRes<TokenStream> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
loop {
|
||||
let (_idx, ch) = match self.curr_char {
|
||||
Some(it) => it,
|
||||
None => break,
|
||||
};
|
||||
// Iterate through the whole sourcecode until EOF is reached
|
||||
while let Some((_idx, ch)) = self.curr_char {
|
||||
|
||||
// Peek the next char & byte index for matching multi-char tokens
|
||||
let (_idx_nxt, ch_nxt) = self
|
||||
.peek()
|
||||
.map(|(a, b)| (Some(a), Some(b)))
|
||||
.unwrap_or_default();
|
||||
|
||||
// Match the current char to decide what Token is represented
|
||||
match ch {
|
||||
// Skip whitespace
|
||||
' ' | '\t' | '\n' | '\r' => (),
|
||||
|
||||
// Lex tokens with 2 char length
|
||||
// Lex tokens with 2 char length. This matches on the current char and also the next
|
||||
|
||||
// Double slash '/' is a comment, so skip ahead to the next line
|
||||
'/' if matches!(ch_nxt, Some('/')) => self.advance_until_new_line(),
|
||||
|
||||
'=' if matches!(ch_nxt, Some('=')) => {
|
||||
self.advance();
|
||||
tokens.push(Token::Op(Op::Eq));
|
||||
@ -83,7 +98,7 @@ impl<'a> Lexer<'a> {
|
||||
tokens.push(Token::Op(Op::Or));
|
||||
}
|
||||
|
||||
// Lex tokens with 1 char length
|
||||
// Lex tokens with 1 char length. This just matches the current char
|
||||
'+' => tokens.push(Token::Op(Op::Add)),
|
||||
'-' => tokens.push(Token::Op(Op::Sub)),
|
||||
'*' => tokens.push(Token::Op(Op::Mul)),
|
||||
@ -104,14 +119,16 @@ impl<'a> Lexer<'a> {
|
||||
'.' => tokens.push(Token::Dot),
|
||||
'!' => tokens.push(Token::Op(Op::Not)),
|
||||
'^' => tokens.push(Token::Op(Op::Xor)),
|
||||
'#' => tokens.push(Token::Hashtag),
|
||||
|
||||
// Lex Strings
|
||||
// A quote represents a string start, so lex a string token here
|
||||
'"' => tokens.push(self.read_string()?),
|
||||
|
||||
// Lex numbers
|
||||
// A numeric digit represents a number start, so lex a number here
|
||||
'0'..='9' => tokens.push(self.read_num()?),
|
||||
|
||||
// Lex identifiers / keywords
|
||||
// An alphabetical char or underscore represents an identifier or keyword start, so
|
||||
// lex an identifier or keyword here
|
||||
'a'..='z' | 'A'..='Z' | '_' => tokens.push(self.read_ident_or_keyword()?),
|
||||
|
||||
// Anything else is an error
|
||||
@ -120,32 +137,47 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
// Consume the current token
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(TokenStream::new(tokens))
|
||||
}
|
||||
|
||||
/// Get the next char & byte index. Don't consume the current char
|
||||
fn peek(&mut self) -> Option<&(usize, char)> {
|
||||
self.code_iter.peek()
|
||||
}
|
||||
|
||||
/// Consume the current char and fetch the next
|
||||
fn advance(&mut self) {
|
||||
self.curr_char = self.code_iter.next();
|
||||
}
|
||||
|
||||
/// Consume all characters until the next line. The last character before the next line is
|
||||
/// still kept in curr_char to be consumed by the tokenize function.
|
||||
fn advance_until_new_line(&mut self) {
|
||||
while !matches!(self.curr_char, Some((_, '\n'))) {
|
||||
self.advance();
|
||||
}
|
||||
if matches!(self.curr_char, Some((_, '\r'))) {
|
||||
if matches!(self.peek(), Some((_, '\r'))) {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a number consisting of one or more digits, starting at the current char. The last digit
|
||||
/// is kept in curr_char to be consumed by the tokenize function.
|
||||
fn read_num(&mut self) -> LexRes<Token> {
|
||||
match self.curr_char {
|
||||
Some((_, '0'..='9')) => (),
|
||||
_ => panic!("Lexer::read_num must not be called without having a digit in curr_char")
|
||||
}
|
||||
|
||||
// The function is only called if the curr_char is the beginning of a number, so curr_char
|
||||
// is guaranteed to be Some at this point
|
||||
let mut snum = format!("{}", self.curr_char.unwrap().1);
|
||||
|
||||
// Append the next chars to the string number until there are no digits anymore
|
||||
while let Some((_idx, ch)) = self.peek() {
|
||||
match ch {
|
||||
'0'..='9' => snum.push(*ch),
|
||||
@ -160,11 +192,24 @@ impl<'a> Lexer<'a> {
|
||||
Ok(Token::Literal(Literal::Int64(snum.parse().unwrap())))
|
||||
}
|
||||
|
||||
/// Lex a string consisting of any text enclosed by doublequotes with support for backslash
|
||||
/// escapes. The opening quote must be in curr_char already. The closing quote is kept in
|
||||
/// curr_char to be consumed by the tokenize function.
|
||||
fn read_string(&mut self) -> LexRes<Token> {
|
||||
match self.curr_char {
|
||||
Some((_, '"')) => (),
|
||||
_ => panic!("Lexer::read_string must not be called without having a '\"' in curr_char")
|
||||
}
|
||||
|
||||
let mut text = String::new();
|
||||
|
||||
// If true, the next character is an escaped char. This is set to true, if the last char
|
||||
// was a backslash
|
||||
let mut escape = false;
|
||||
|
||||
loop {
|
||||
// If the end of the sourcecode is reached while still lexing a string, there must have
|
||||
// been a quote missing
|
||||
let (_idx, ch) = match self.peek() {
|
||||
Some(it) => *it,
|
||||
None => return Err(LexErr::new(LexErrType::MissingQuoteEnd)),
|
||||
@ -173,7 +218,7 @@ impl<'a> Lexer<'a> {
|
||||
if escape {
|
||||
match ch {
|
||||
'"' | '\\' => text.push(ch),
|
||||
'\n' => text.push('\n'),
|
||||
'n' => text.push('\n'),
|
||||
'r' => text.push('\r'),
|
||||
't' => text.push('\t'),
|
||||
_ => return Err(LexErr::new(LexErrType::InvalidEscapeChar(ch))),
|
||||
@ -194,7 +239,17 @@ impl<'a> Lexer<'a> {
|
||||
Ok(Token::Literal(Literal::String(text)))
|
||||
}
|
||||
|
||||
/// Lex an identifier or keyword consisting of alphabetic characters, digits and underscores
|
||||
/// and starting with a alphabetic character or underscore. The first character is in curr_char
|
||||
/// and the last character is left in curr_char to be consumed by the tokenize function.
|
||||
/// If the identifier is a language keyword it is lexed as the appropriate token instead of a
|
||||
/// generall identifier token.
|
||||
fn read_ident_or_keyword(&mut self) -> LexRes<Token> {
|
||||
match self.curr_char {
|
||||
Some((_, 'a'..='z' | 'A'..='Z' | '_')) => (),
|
||||
_ => panic!("Lexer::read_num must not be called without having a char or '_' in curr_char")
|
||||
}
|
||||
|
||||
let mut ident = format!("{}", self.curr_char.unwrap().1);
|
||||
|
||||
while let Some((_idx, ch)) = self.peek() {
|
||||
@ -205,6 +260,7 @@ impl<'a> Lexer<'a> {
|
||||
self.advance();
|
||||
}
|
||||
|
||||
// Check if the identifier is a language keyword
|
||||
let token = match ident.as_str() {
|
||||
"let" => Token::Keyword(Keyword::Let),
|
||||
"if" => Token::Keyword(Keyword::If),
|
||||
@ -249,7 +305,7 @@ mod test {
|
||||
([{)]}
|
||||
4564 "a string" false true
|
||||
an_5ident6
|
||||
; : , .
|
||||
; : , . #
|
||||
let if while loop else fn return void
|
||||
"#;
|
||||
|
||||
@ -294,6 +350,7 @@ mod test {
|
||||
Token::Colon,
|
||||
Token::Comma,
|
||||
Token::Dot,
|
||||
Token::Hashtag,
|
||||
|
||||
Token::Keyword(Keyword::Let),
|
||||
Token::Keyword(Keyword::If),
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
use crate::{
|
||||
ast::{BinOpType, Expr, FnCall, Statement, UnOpType},
|
||||
ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType},
|
||||
token::{Group, Keyword, Op, Token, TokenStream},
|
||||
};
|
||||
|
||||
@ -8,6 +8,22 @@ pub struct ParseErr;
|
||||
|
||||
type PRes<T> = Result<T, ParseErr>;
|
||||
|
||||
/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree).
|
||||
///
|
||||
/// ## Grammar
|
||||
/// ### Statements
|
||||
/// `stmt_let = "let" ident "=" expr_add` \
|
||||
/// `stmt_assign = ident "=" expr_add` \
|
||||
/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` \
|
||||
///
|
||||
/// ### Expressions
|
||||
/// `expr_literal = LITERAL` \
|
||||
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` \
|
||||
/// `expr_varibale = IDENT` \
|
||||
/// `expr_value = expr_literal | expr_fn_call | expr_variable` \
|
||||
/// `expr_term = "-" expr_term | "(" expr_add ")" | expr_value` \
|
||||
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \
|
||||
/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \
|
||||
pub struct Parser {
|
||||
tokens: TokenStream,
|
||||
}
|
||||
@ -16,13 +32,12 @@ pub struct Parser {
|
||||
# GRAMMAR
|
||||
## expressions
|
||||
|
||||
ident = IDENT
|
||||
expr_literal = LITERAL
|
||||
expr_fn_call = ident "(" expr_add? ( "," expr_add )* ")"
|
||||
expr_varibale = ident
|
||||
expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"
|
||||
expr_varibale = IDENT
|
||||
expr_value = expr_literal | expr_fn_call | expr_variable
|
||||
expr_term = "-" expr_term | "(" expr_add ")" | expr_literal
|
||||
expr_mul = expr_term (("*"|"/") expr_term)*
|
||||
expr_term = "-" expr_term | "(" expr_add ")" | expr_value
|
||||
expr_mul = expr_term (("*"|"/"|"%") expr_term)*
|
||||
expr_add = expr_mul (("+"|"-") expr_mul)*
|
||||
|
||||
## statements
|
||||
@ -31,24 +46,31 @@ stmt_assign = ident "=" expr_add
|
||||
stmt = ( stmt_let | stmt_assign | expr_add ) ";"
|
||||
*/
|
||||
|
||||
|
||||
impl Parser {
|
||||
/// Create a new parser from a TokenStream
|
||||
pub fn new(tokens: TokenStream) -> Self {
|
||||
Self { tokens }
|
||||
}
|
||||
|
||||
/// Get the current token without consuming it
|
||||
pub fn curr(&self) -> Option<&Token> {
|
||||
self.tokens.curr()
|
||||
}
|
||||
|
||||
/// Get the next token without consuming it
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.tokens.peek()
|
||||
}
|
||||
|
||||
/// Advance to the next token, consuming it in the process
|
||||
pub fn advance(&mut self) -> Option<&Token> {
|
||||
self.tokens.advance()
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> PRes<Vec<Statement>> {
|
||||
/// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a
|
||||
/// sequence of statements.
|
||||
pub fn parse(&mut self) -> PRes<Ast> {
|
||||
let mut prog = Vec::new();
|
||||
|
||||
while let Some(tok) = self.curr() {
|
||||
@ -62,18 +84,29 @@ impl Parser {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(prog)
|
||||
Ok(Ast::new(prog))
|
||||
}
|
||||
|
||||
/// Parse a statement from the TokenStream. This consists of an expression, a let statement or
|
||||
/// an assignment.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"`
|
||||
pub fn parse_statement(&mut self) -> PRes<Statement> {
|
||||
// Check the current and next char to decide what kind of statement is being parsed
|
||||
let stmt = match self.curr() {
|
||||
// A let token -> Parse a let statement
|
||||
Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(),
|
||||
// Ident and "=" -> An assignment without declaration (let)
|
||||
Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => {
|
||||
self.parse_stmt_assign()
|
||||
}
|
||||
// Otherwise -> A simple expression
|
||||
_ => self.parse_expr_add().map(|expr| Statement::Expr(expr)),
|
||||
};
|
||||
|
||||
// Check that the statement is terminated with a semicolon.
|
||||
// TODO: This is not needed for block based statements like `while expr { ... }`
|
||||
if !matches!(self.advance(), Some(Token::Semicolon)) {
|
||||
panic!("Expected ';' while parsing statement");
|
||||
}
|
||||
@ -81,20 +114,29 @@ impl Parser {
|
||||
stmt
|
||||
}
|
||||
|
||||
/// Parse a let statement from the TokenStream. This consists of a let token, an identifier,
|
||||
/// an equal sign "=" and an expression.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `stmt_let = "let" ident "=" expr_add`
|
||||
pub fn parse_stmt_let(&mut self) -> PRes<Statement> {
|
||||
// Check if the let token is there
|
||||
if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) {
|
||||
panic!("Unexpected token while parsing let statement. Expected 'let'");
|
||||
}
|
||||
|
||||
// Fetch the variable name
|
||||
let var_name = match self.advance() {
|
||||
Some(Token::Ident(ident)) => ident.clone(),
|
||||
_ => panic!("Unexpected token while parsing let statement. Expected ident"),
|
||||
};
|
||||
|
||||
// Check if the equal sign is present
|
||||
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
|
||||
panic!("Unexpected token while parsing let statement. Expected '='");
|
||||
}
|
||||
|
||||
// Parse the right hand side of the let statement
|
||||
let rhs = self.parse_expr_add()?;
|
||||
|
||||
let let_binding = Statement::LetBinding(var_name, rhs);
|
||||
@ -102,16 +144,24 @@ impl Parser {
|
||||
Ok(let_binding)
|
||||
}
|
||||
|
||||
/// Parse an assignment statement from the TokenStream. This consists of a an identifier, an
|
||||
/// equal sign "=" and an expression.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `stmt_assign = ident "=" expr_add`
|
||||
pub fn parse_stmt_assign(&mut self) -> PRes<Statement> {
|
||||
// Fetch the variable name
|
||||
let var_name = match self.advance() {
|
||||
Some(Token::Ident(ident)) => ident.clone(),
|
||||
_ => panic!("Unexpected token while parsing assignment statement. Expected ident"),
|
||||
};
|
||||
|
||||
// Check that the equal sign is present
|
||||
if !matches!(self.advance(), Some(Token::Op(Op::Assign))) {
|
||||
panic!("Unexpected token while parsing let assignment. Expected '='");
|
||||
}
|
||||
|
||||
// Parse the right hand side of the assignment
|
||||
let rhs = self.parse_expr_add()?;
|
||||
|
||||
let let_binding = Statement::Assignment(var_name, rhs);
|
||||
@ -119,14 +169,26 @@ impl Parser {
|
||||
Ok(let_binding)
|
||||
}
|
||||
|
||||
/// The main expression parsing function. This can be a multiplication expression and 0 or more
|
||||
/// further multiplication expressions separated by addition precedence operators (add '+',
|
||||
/// sub '-').
|
||||
///
|
||||
/// Add is the operator with the lowest precedence which is why this recursively handles all
|
||||
/// other kinds of expressions.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_add = expr_mul (("+"|"-") expr_mul)*`
|
||||
pub fn parse_expr_add(&mut self) -> PRes<Expr> {
|
||||
let mut a = self.parse_expr_mul()?;
|
||||
// Parse the left hand side / the main expression if there is nothing on the right
|
||||
let mut lhs = self.parse_expr_mul()?;
|
||||
|
||||
// Parse 0 or more expressions to the right side of the add operators
|
||||
while matches!(self.curr(), Some(Token::Op(Op::Add | Op::Sub))) {
|
||||
// We successfully matched curr against Some already in the while condition, so unwrap is fine
|
||||
// We successfully matched curr against Some already in the while condition, so unwrap
|
||||
// is fine
|
||||
let tok_op = self.advance().unwrap().clone();
|
||||
|
||||
let b = self.parse_expr_mul()?;
|
||||
let rhs = self.parse_expr_mul()?;
|
||||
|
||||
let op_type = match tok_op {
|
||||
Token::Op(Op::Add) => BinOpType::Add,
|
||||
@ -134,15 +196,23 @@ impl Parser {
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
a = Expr::BinOp(op_type, a.into(), b.into());
|
||||
lhs = Expr::BinOp(op_type, lhs.into(), rhs.into());
|
||||
}
|
||||
|
||||
Ok(a)
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
/// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more
|
||||
/// further terms separated by multiplication precedence operators (multiply '*', divide '/',
|
||||
/// modulo '%')
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*`
|
||||
pub fn parse_expr_mul(&mut self) -> PRes<Expr> {
|
||||
let mut a = self.parse_expr_term()?;
|
||||
// Parse the left hand side / the main expression if there is nothing on the right
|
||||
let mut lhs = self.parse_expr_term()?;
|
||||
|
||||
// Parse 0 or more expressions to the right side of the mul operators
|
||||
while matches!(self.curr(), Some(Token::Op(Op::Mul | Op::Div | Op::Mod))) {
|
||||
// We successfully matched curr against Some already in the while condition, so unwrap is fine
|
||||
let tok_op = self.advance().unwrap().clone();
|
||||
@ -156,31 +226,52 @@ impl Parser {
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
a = Expr::BinOp(op_type, a.into(), b.into());
|
||||
lhs = Expr::BinOp(op_type, lhs.into(), b.into());
|
||||
}
|
||||
|
||||
Ok(a)
|
||||
Ok(lhs)
|
||||
}
|
||||
|
||||
/// Parse a term expression from the TokenSteam. This can be the negation of a term, an add
|
||||
/// expression enclosed by parentheses or a value.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `"-" expr_term | "(" expr_add ")" | expr_value`
|
||||
pub fn parse_expr_term(&mut self) -> PRes<Expr> {
|
||||
let term = match self.curr() {
|
||||
// Current token is an opening parentheses '(' -> Must be an enclosed expr_add
|
||||
Some(Token::Open(Group::Paren)) => {
|
||||
// Skip the '('
|
||||
self.advance();
|
||||
let a = self.parse_expr_add()?;
|
||||
|
||||
let expr = self.parse_expr_add()?;
|
||||
|
||||
// After the expression must be closing parentheses ')'
|
||||
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
|
||||
panic!("Missing closing parentheses while parsing term");
|
||||
}
|
||||
a
|
||||
|
||||
expr
|
||||
}
|
||||
// Current token is a minus '-' -> Must be a negated expr_term
|
||||
Some(Token::Op(Op::Sub)) => {
|
||||
// Skip the '-'
|
||||
self.advance();
|
||||
|
||||
// Parse an expr_term in a Negation Node
|
||||
Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into())
|
||||
}
|
||||
// Nothing special in the current -> Must be an expr_value
|
||||
_ => self.parse_expr_value()?,
|
||||
};
|
||||
Ok(term)
|
||||
}
|
||||
|
||||
/// Parse a value expression from the TokenSteam. This can be a literal value, a function call
|
||||
/// or a variable.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_value = expr_literal | expr_fn_call | expr_variable`
|
||||
pub fn parse_expr_value(&mut self) -> PRes<Expr> {
|
||||
match self.curr() {
|
||||
Some(Token::Literal(_)) => self.parse_expr_literal(),
|
||||
@ -192,37 +283,62 @@ impl Parser {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a function call from the TokenStream. This consists of an identifier and 0 or more
|
||||
/// add expressions enclosed by parentheses '(', ')' and separated by commas ',' .
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"`
|
||||
pub fn parse_expr_fn_call(&mut self) -> PRes<Expr> {
|
||||
// The first 2 checks are not really necessary for internal calls since parse_expr_value
|
||||
// verifies the tokens already
|
||||
|
||||
// Get the function name
|
||||
let fn_name = match self.advance() {
|
||||
Some(Token::Ident(ident)) => ident.clone(),
|
||||
_ => panic!("Unexpected token while parsing function call. Expected identifier"),
|
||||
};
|
||||
|
||||
// Check that there really is an opening parentheses
|
||||
if !matches!(self.advance(), Some(Token::Open(Group::Paren))) {
|
||||
panic!("Unexpected token while parsing function call. Expected '('");
|
||||
}
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
// If there is not a closing parentheses directly after the opening "()", parse at least
|
||||
// one add expression
|
||||
// TODO: This is *suboptimal* code
|
||||
if !matches!(self.curr(), Some(Token::Close(Group::Paren))) {
|
||||
args.push(self.parse_expr_add()?);
|
||||
|
||||
// As long as there are commas after the expressions, parse more expressions as
|
||||
// parameters
|
||||
while matches!(self.curr(), Some(Token::Comma)) {
|
||||
self.advance();
|
||||
args.push(self.parse_expr_add()?);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there really is a closing parentheses
|
||||
if !matches!(self.advance(), Some(Token::Close(Group::Paren))) {
|
||||
panic!("Unexpected token while parsing function call. Expected '('");
|
||||
}
|
||||
|
||||
Ok(Expr::FnCall(FnCall { fn_name, args }))
|
||||
// By default don't parse as an intrinsic function
|
||||
let intrinsic = false;
|
||||
|
||||
Ok(Expr::FnCall(FnCall {
|
||||
intrinsic,
|
||||
fn_name,
|
||||
args,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Parse a variable name value. This consists of an identifier without parentheses afterwards.
|
||||
/// The identifier represents the variable name.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_varibale = IDENT`
|
||||
pub fn parse_expr_varibale(&mut self) -> PRes<Expr> {
|
||||
match self.advance() {
|
||||
Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())),
|
||||
@ -230,6 +346,10 @@ impl Parser {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a literal value. This consists of a literal token.
|
||||
///
|
||||
/// ### Grammar
|
||||
/// `expr_literal = LITERAL`
|
||||
pub fn parse_expr_literal(&mut self) -> PRes<Expr> {
|
||||
match self.advance() {
|
||||
Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())),
|
||||
@ -308,6 +428,7 @@ mod tests {
|
||||
Box::new(Expr::UnOp(
|
||||
UnOpType::Neg,
|
||||
Expr::FnCall(FnCall {
|
||||
intrinsic: false,
|
||||
fn_name,
|
||||
args: vec![Expr::Literal(Literal::Int64(9))],
|
||||
})
|
||||
|
||||
@ -4,41 +4,66 @@ use std::{fmt::Display, borrow::Cow};
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Op {
|
||||
// Addition
|
||||
|
||||
/// Add "+"
|
||||
Add,
|
||||
/// Subtract "-"
|
||||
Sub,
|
||||
|
||||
// Multiplications
|
||||
|
||||
/// Multiply "*"
|
||||
Mul,
|
||||
/// Divide "/"
|
||||
Div,
|
||||
/// Modulo "%"
|
||||
Mod,
|
||||
|
||||
// Assignment
|
||||
/// Assignment "="
|
||||
Assign,
|
||||
|
||||
// Equality
|
||||
|
||||
/// Equal "=="
|
||||
Eq,
|
||||
/// Not equal "!="
|
||||
Neq,
|
||||
/// Greater than ">"
|
||||
Gt,
|
||||
/// Lesser than "<"
|
||||
Lt,
|
||||
/// Greater or equal ">="
|
||||
Ge,
|
||||
/// Lesser or equal "<="
|
||||
Le,
|
||||
|
||||
// Bool
|
||||
// Boolean
|
||||
|
||||
/// And "&&"
|
||||
And,
|
||||
/// Or "||"
|
||||
Or,
|
||||
/// Not "!"
|
||||
Not,
|
||||
/// Xor "^"
|
||||
Xor,
|
||||
|
||||
/// Arrow "->"
|
||||
Arrow,
|
||||
}
|
||||
|
||||
/// Different types of parentheses
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Group {
|
||||
/// Parentheses "(" | ")"
|
||||
Paren,
|
||||
/// Brackets "[" | "]"
|
||||
Bracket,
|
||||
/// Braces "{" | "}"
|
||||
Braces,
|
||||
}
|
||||
|
||||
/// Literal values for the different datatypes
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Literal {
|
||||
Boolean(bool),
|
||||
@ -46,6 +71,7 @@ pub enum Literal {
|
||||
String(String),
|
||||
}
|
||||
|
||||
/// Language Keywords
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Keyword {
|
||||
Let,
|
||||
@ -58,23 +84,35 @@ pub enum Keyword {
|
||||
Void,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Token {
|
||||
/// Literal values
|
||||
Literal(Literal),
|
||||
/// Operators
|
||||
Op(Op),
|
||||
/// Opening parentheses
|
||||
Open(Group),
|
||||
/// Closing parentheses
|
||||
Close(Group),
|
||||
|
||||
/// Identifier
|
||||
Ident(String),
|
||||
|
||||
/// Language keywords
|
||||
Keyword(Keyword),
|
||||
|
||||
/// Semicolon ";"
|
||||
Semicolon,
|
||||
/// Colon ":"
|
||||
Colon,
|
||||
/// Comma ","
|
||||
Comma,
|
||||
/// Dot "."
|
||||
Dot,
|
||||
/// Hashtag "#"
|
||||
Hashtag,
|
||||
}
|
||||
|
||||
/// A token buffer with an index for iterating over the tokens
|
||||
pub struct TokenStream {
|
||||
tokens: Vec<Token>,
|
||||
idx: usize,
|
||||
@ -83,7 +121,8 @@ pub struct TokenStream {
|
||||
impl Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
|
||||
let op: Cow<'static, str> = match self {
|
||||
// String representation of the Token
|
||||
let stok: Cow<'static, str> = match self {
|
||||
Token::Op(Op::Add) => "+".into(),
|
||||
Token::Op(Op::Sub) => "-".into(),
|
||||
|
||||
@ -123,6 +162,7 @@ impl Display for Token {
|
||||
Token::Colon => ":".into(),
|
||||
Token::Comma => ",".into(),
|
||||
Token::Dot => ".".into(),
|
||||
Token::Hashtag => "#".into(),
|
||||
|
||||
Token::Keyword(Keyword::Let) => "let".into(),
|
||||
Token::Keyword(Keyword::If) => "if".into(),
|
||||
@ -134,28 +174,32 @@ impl Display for Token {
|
||||
Token::Keyword(Keyword::Void) => "void".into(),
|
||||
};
|
||||
|
||||
write!(f, "{}", op)
|
||||
write!(f, "{}", stok)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenStream {
|
||||
/// Create a new TokenStream from the given token buffer
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens, idx: 0 }
|
||||
}
|
||||
|
||||
pub fn as_vec(&self) -> &Vec<Token> {
|
||||
/// Get the underlying token buffer as reference
|
||||
pub fn as_vec(&self) -> &[Token] {
|
||||
&self.tokens
|
||||
}
|
||||
|
||||
/// Get the current token as reference. This does not advance to the next token
|
||||
pub fn curr(&self) -> Option<&Token> {
|
||||
self.tokens.get(self.idx)
|
||||
}
|
||||
|
||||
/// Get the next token as reference. This does not advance to the next token
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.tokens.get(self.idx + 1)
|
||||
}
|
||||
|
||||
/// Advance to the next token. Sets curr to next and returns the old curr.
|
||||
/// Advance to the next token. Sets curr to next and returns the old curr
|
||||
pub fn advance(&mut self) -> Option<&Token> {
|
||||
self.idx += 1;
|
||||
self.tokens.get(self.idx - 1)
|
||||
@ -163,7 +207,7 @@ impl TokenStream {
|
||||
}
|
||||
|
||||
impl Display for TokenStream {
|
||||
/// Print the TokenStream with autofomatting
|
||||
/// Print the TokenStream with autoformatting
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let mut indent = 0_usize;
|
||||
let mut fresh_line = true;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user