From cfc585426da8be49248eed8b90826b534f45d2cb Mon Sep 17 00:00:00 2001 From: Daniel M Date: Tue, 28 Dec 2021 20:43:24 +0100 Subject: [PATCH] Add comments & small additions - Added more & better comments for `token.rs`, `lexer.rs`, `parser.rs` - Implemented HashTag Token for Lexer - Implemented additional safety checks for the Lexer::read functions --- plang2_lib/src/ast.rs | 16 +++- plang2_lib/src/interpreter.rs | 6 +- plang2_lib/src/lexer.rs | 83 +++++++++++++++--- plang2_lib/src/parser.rs | 159 ++++++++++++++++++++++++++++++---- plang2_lib/src/token.rs | 62 +++++++++++-- 5 files changed, 280 insertions(+), 46 deletions(-) diff --git a/plang2_lib/src/ast.rs b/plang2_lib/src/ast.rs index ba5462e..56a86dc 100644 --- a/plang2_lib/src/ast.rs +++ b/plang2_lib/src/ast.rs @@ -8,17 +8,18 @@ pub enum BinOpType { Mul, Div, - Mod + Mod, } /// Unary Operator Types. For operations that have one operand #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum UnOpType { - Neg + Neg, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct FnCall { + pub intrinsic: bool, pub fn_name: String, pub args: Vec, } @@ -39,3 +40,14 @@ pub enum Statement { LetBinding(String, Expr), Assignment(String, Expr), } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Ast { + pub(crate) prog: Vec, +} + +impl Ast { + pub fn new(prog: Vec) -> Self { + Self { prog } + } +} diff --git a/plang2_lib/src/interpreter.rs b/plang2_lib/src/interpreter.rs index c4a0870..6580674 100644 --- a/plang2_lib/src/interpreter.rs +++ b/plang2_lib/src/interpreter.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use crate::{ast::{Statement, Expr}, token::Literal}; +use crate::{ast::{Statement, Expr, Ast}, token::Literal}; pub struct Interpreter { prog: Vec, @@ -13,9 +13,9 @@ pub struct Interpreter { } impl Interpreter { - pub fn new(prog: Vec) -> Self { + pub fn new(prog: Ast) -> Self { let variables = Default::default(); - Self { prog, variables, debug: true } + Self { prog: prog.prog, variables, debug: true } } pub fn run(&mut self) { diff --git a/plang2_lib/src/lexer.rs b/plang2_lib/src/lexer.rs index b852d2c..04ff857 100644 --- a/plang2_lib/src/lexer.rs +++ b/plang2_lib/src/lexer.rs @@ -4,8 +4,12 @@ use super::token::*; #[derive(Debug)] pub enum LexErrType { + /// Lexer encountered an invalid character InvalidCharacter(char), + /// While lexing a string an invalid escaped character was encountered. Backslash '\\' followed + /// by the offending character InvalidEscapeChar(char), + /// While lexing a string, the closing quote did not occur before file end MissingQuoteEnd, } @@ -19,11 +23,17 @@ type LexRes = Result; pub struct Lexer<'a> { // code: &'a str, + + /// Peekable iterator over the sourcecode utf-8 characters together with the byte indices code_iter: Peekable>, + + /// The char & byte index pair that is currently being evaulated. This character will not be + /// present in the code_iter iterator since it has been removed already. curr_char: Option<(usize, char)>, } impl<'a> Lexer<'a> { + /// Create a new Lexer from the given sourcecode string pub fn new(code: &'a str) -> Self { let mut code_iter = code.char_indices().peekable(); let curr_char = code_iter.next(); @@ -34,26 +44,31 @@ impl<'a> Lexer<'a> { } } + /// Lex the sourcecode and produce a TokenStream containing the Tokens represented by the + /// sourcecode. This can fail due to a few lexing errors like encountering unknown / unhandled + /// chars, non terminated quotes and so on. Syntactic errors are not detected at this point. pub fn tokenize(&mut self) -> LexRes { let mut tokens = Vec::new(); - loop { - let (_idx, ch) = match self.curr_char { - Some(it) => it, - None => break, - }; + // Iterate through the whole sourcecode until EOF is reached + while let Some((_idx, ch)) = self.curr_char { + // Peek the next char & byte index for matching multi-char tokens let (_idx_nxt, ch_nxt) = self .peek() .map(|(a, b)| (Some(a), Some(b))) .unwrap_or_default(); + // Match the current char to decide what Token is represented match ch { // Skip whitespace ' ' | '\t' | '\n' | '\r' => (), - // Lex tokens with 2 char length + // Lex tokens with 2 char length. This matches on the current char and also the next + + // Double slash '/' is a comment, so skip ahead to the next line '/' if matches!(ch_nxt, Some('/')) => self.advance_until_new_line(), + '=' if matches!(ch_nxt, Some('=')) => { self.advance(); tokens.push(Token::Op(Op::Eq)); @@ -83,7 +98,7 @@ impl<'a> Lexer<'a> { tokens.push(Token::Op(Op::Or)); } - // Lex tokens with 1 char length + // Lex tokens with 1 char length. This just matches the current char '+' => tokens.push(Token::Op(Op::Add)), '-' => tokens.push(Token::Op(Op::Sub)), '*' => tokens.push(Token::Op(Op::Mul)), @@ -104,14 +119,16 @@ impl<'a> Lexer<'a> { '.' => tokens.push(Token::Dot), '!' => tokens.push(Token::Op(Op::Not)), '^' => tokens.push(Token::Op(Op::Xor)), + '#' => tokens.push(Token::Hashtag), - // Lex Strings + // A quote represents a string start, so lex a string token here '"' => tokens.push(self.read_string()?), - // Lex numbers + // A numeric digit represents a number start, so lex a number here '0'..='9' => tokens.push(self.read_num()?), - // Lex identifiers / keywords + // An alphabetical char or underscore represents an identifier or keyword start, so + // lex an identifier or keyword here 'a'..='z' | 'A'..='Z' | '_' => tokens.push(self.read_ident_or_keyword()?), // Anything else is an error @@ -120,32 +137,47 @@ impl<'a> Lexer<'a> { } } + // Consume the current token self.advance(); } Ok(TokenStream::new(tokens)) } + /// Get the next char & byte index. Don't consume the current char fn peek(&mut self) -> Option<&(usize, char)> { self.code_iter.peek() } + /// Consume the current char and fetch the next fn advance(&mut self) { self.curr_char = self.code_iter.next(); } + /// Consume all characters until the next line. The last character before the next line is + /// still kept in curr_char to be consumed by the tokenize function. fn advance_until_new_line(&mut self) { while !matches!(self.curr_char, Some((_, '\n'))) { self.advance(); } - if matches!(self.curr_char, Some((_, '\r'))) { + if matches!(self.peek(), Some((_, '\r'))) { self.advance(); } } + /// Lex a number consisting of one or more digits, starting at the current char. The last digit + /// is kept in curr_char to be consumed by the tokenize function. fn read_num(&mut self) -> LexRes { + match self.curr_char { + Some((_, '0'..='9')) => (), + _ => panic!("Lexer::read_num must not be called without having a digit in curr_char") + } + + // The function is only called if the curr_char is the beginning of a number, so curr_char + // is guaranteed to be Some at this point let mut snum = format!("{}", self.curr_char.unwrap().1); + // Append the next chars to the string number until there are no digits anymore while let Some((_idx, ch)) = self.peek() { match ch { '0'..='9' => snum.push(*ch), @@ -160,11 +192,24 @@ impl<'a> Lexer<'a> { Ok(Token::Literal(Literal::Int64(snum.parse().unwrap()))) } + /// Lex a string consisting of any text enclosed by doublequotes with support for backslash + /// escapes. The opening quote must be in curr_char already. The closing quote is kept in + /// curr_char to be consumed by the tokenize function. fn read_string(&mut self) -> LexRes { + match self.curr_char { + Some((_, '"')) => (), + _ => panic!("Lexer::read_string must not be called without having a '\"' in curr_char") + } + let mut text = String::new(); + // If true, the next character is an escaped char. This is set to true, if the last char + // was a backslash let mut escape = false; + loop { + // If the end of the sourcecode is reached while still lexing a string, there must have + // been a quote missing let (_idx, ch) = match self.peek() { Some(it) => *it, None => return Err(LexErr::new(LexErrType::MissingQuoteEnd)), @@ -173,7 +218,7 @@ impl<'a> Lexer<'a> { if escape { match ch { '"' | '\\' => text.push(ch), - '\n' => text.push('\n'), + 'n' => text.push('\n'), 'r' => text.push('\r'), 't' => text.push('\t'), _ => return Err(LexErr::new(LexErrType::InvalidEscapeChar(ch))), @@ -194,7 +239,17 @@ impl<'a> Lexer<'a> { Ok(Token::Literal(Literal::String(text))) } + /// Lex an identifier or keyword consisting of alphabetic characters, digits and underscores + /// and starting with a alphabetic character or underscore. The first character is in curr_char + /// and the last character is left in curr_char to be consumed by the tokenize function. + /// If the identifier is a language keyword it is lexed as the appropriate token instead of a + /// generall identifier token. fn read_ident_or_keyword(&mut self) -> LexRes { + match self.curr_char { + Some((_, 'a'..='z' | 'A'..='Z' | '_')) => (), + _ => panic!("Lexer::read_num must not be called without having a char or '_' in curr_char") + } + let mut ident = format!("{}", self.curr_char.unwrap().1); while let Some((_idx, ch)) = self.peek() { @@ -205,6 +260,7 @@ impl<'a> Lexer<'a> { self.advance(); } + // Check if the identifier is a language keyword let token = match ident.as_str() { "let" => Token::Keyword(Keyword::Let), "if" => Token::Keyword(Keyword::If), @@ -249,7 +305,7 @@ mod test { ([{)]} 4564 "a string" false true an_5ident6 - ; : , . + ; : , . # let if while loop else fn return void "#; @@ -294,6 +350,7 @@ mod test { Token::Colon, Token::Comma, Token::Dot, + Token::Hashtag, Token::Keyword(Keyword::Let), Token::Keyword(Keyword::If), diff --git a/plang2_lib/src/parser.rs b/plang2_lib/src/parser.rs index 3ef2735..3e2abfd 100644 --- a/plang2_lib/src/parser.rs +++ b/plang2_lib/src/parser.rs @@ -1,5 +1,5 @@ use crate::{ - ast::{BinOpType, Expr, FnCall, Statement, UnOpType}, + ast::{Ast, BinOpType, Expr, FnCall, Statement, UnOpType}, token::{Group, Keyword, Op, Token, TokenStream}, }; @@ -8,6 +8,22 @@ pub struct ParseErr; type PRes = Result; +/// The Parser contains a TokenStream to be parsed into an Ast (abstract syntax tree). +/// +/// ## Grammar +/// ### Statements +/// `stmt_let = "let" ident "=" expr_add` \ +/// `stmt_assign = ident "=" expr_add` \ +/// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` \ +/// +/// ### Expressions +/// `expr_literal = LITERAL` \ +/// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` \ +/// `expr_varibale = IDENT` \ +/// `expr_value = expr_literal | expr_fn_call | expr_variable` \ +/// `expr_term = "-" expr_term | "(" expr_add ")" | expr_value` \ +/// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` \ +/// `expr_add = expr_mul (("+"|"-") expr_mul)*` \ pub struct Parser { tokens: TokenStream, } @@ -16,13 +32,12 @@ pub struct Parser { # GRAMMAR ## expressions -ident = IDENT expr_literal = LITERAL -expr_fn_call = ident "(" expr_add? ( "," expr_add )* ")" -expr_varibale = ident +expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")" +expr_varibale = IDENT expr_value = expr_literal | expr_fn_call | expr_variable -expr_term = "-" expr_term | "(" expr_add ")" | expr_literal -expr_mul = expr_term (("*"|"/") expr_term)* +expr_term = "-" expr_term | "(" expr_add ")" | expr_value +expr_mul = expr_term (("*"|"/"|"%") expr_term)* expr_add = expr_mul (("+"|"-") expr_mul)* ## statements @@ -31,24 +46,31 @@ stmt_assign = ident "=" expr_add stmt = ( stmt_let | stmt_assign | expr_add ) ";" */ + impl Parser { + /// Create a new parser from a TokenStream pub fn new(tokens: TokenStream) -> Self { Self { tokens } } + /// Get the current token without consuming it pub fn curr(&self) -> Option<&Token> { self.tokens.curr() } + /// Get the next token without consuming it pub fn peek(&self) -> Option<&Token> { self.tokens.peek() } + /// Advance to the next token, consuming it in the process pub fn advance(&mut self) -> Option<&Token> { self.tokens.advance() } - pub fn parse(&mut self) -> PRes> { + /// Parse a whole TokenStream into an Ast (abstract syntax tree). A program consists of a + /// sequence of statements. + pub fn parse(&mut self) -> PRes { let mut prog = Vec::new(); while let Some(tok) = self.curr() { @@ -62,18 +84,29 @@ impl Parser { } } - Ok(prog) + Ok(Ast::new(prog)) } + /// Parse a statement from the TokenStream. This consists of an expression, a let statement or + /// an assignment. + /// + /// ### Grammar + /// `stmt = ( stmt_let | stmt_assign | expr_add ) ";"` pub fn parse_statement(&mut self) -> PRes { + // Check the current and next char to decide what kind of statement is being parsed let stmt = match self.curr() { + // A let token -> Parse a let statement Some(Token::Keyword(Keyword::Let)) => self.parse_stmt_let(), + // Ident and "=" -> An assignment without declaration (let) Some(Token::Ident(_)) if matches!(self.peek(), Some(Token::Op(Op::Assign))) => { self.parse_stmt_assign() } + // Otherwise -> A simple expression _ => self.parse_expr_add().map(|expr| Statement::Expr(expr)), }; + // Check that the statement is terminated with a semicolon. + // TODO: This is not needed for block based statements like `while expr { ... }` if !matches!(self.advance(), Some(Token::Semicolon)) { panic!("Expected ';' while parsing statement"); } @@ -81,20 +114,29 @@ impl Parser { stmt } + /// Parse a let statement from the TokenStream. This consists of a let token, an identifier, + /// an equal sign "=" and an expression. + /// + /// ### Grammar + /// `stmt_let = "let" ident "=" expr_add` pub fn parse_stmt_let(&mut self) -> PRes { + // Check if the let token is there if !matches!(self.advance(), Some(Token::Keyword(Keyword::Let))) { panic!("Unexpected token while parsing let statement. Expected 'let'"); } + // Fetch the variable name let var_name = match self.advance() { Some(Token::Ident(ident)) => ident.clone(), _ => panic!("Unexpected token while parsing let statement. Expected ident"), }; + // Check if the equal sign is present if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { panic!("Unexpected token while parsing let statement. Expected '='"); } + // Parse the right hand side of the let statement let rhs = self.parse_expr_add()?; let let_binding = Statement::LetBinding(var_name, rhs); @@ -102,16 +144,24 @@ impl Parser { Ok(let_binding) } + /// Parse an assignment statement from the TokenStream. This consists of a an identifier, an + /// equal sign "=" and an expression. + /// + /// ### Grammar + /// `stmt_assign = ident "=" expr_add` pub fn parse_stmt_assign(&mut self) -> PRes { + // Fetch the variable name let var_name = match self.advance() { Some(Token::Ident(ident)) => ident.clone(), _ => panic!("Unexpected token while parsing assignment statement. Expected ident"), }; + // Check that the equal sign is present if !matches!(self.advance(), Some(Token::Op(Op::Assign))) { panic!("Unexpected token while parsing let assignment. Expected '='"); } + // Parse the right hand side of the assignment let rhs = self.parse_expr_add()?; let let_binding = Statement::Assignment(var_name, rhs); @@ -119,14 +169,26 @@ impl Parser { Ok(let_binding) } + /// The main expression parsing function. This can be a multiplication expression and 0 or more + /// further multiplication expressions separated by addition precedence operators (add '+', + /// sub '-'). + /// + /// Add is the operator with the lowest precedence which is why this recursively handles all + /// other kinds of expressions. + /// + /// ### Grammar + /// `expr_add = expr_mul (("+"|"-") expr_mul)*` pub fn parse_expr_add(&mut self) -> PRes { - let mut a = self.parse_expr_mul()?; + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_mul()?; + // Parse 0 or more expressions to the right side of the add operators while matches!(self.curr(), Some(Token::Op(Op::Add | Op::Sub))) { - // We successfully matched curr against Some already in the while condition, so unwrap is fine + // We successfully matched curr against Some already in the while condition, so unwrap + // is fine let tok_op = self.advance().unwrap().clone(); - let b = self.parse_expr_mul()?; + let rhs = self.parse_expr_mul()?; let op_type = match tok_op { Token::Op(Op::Add) => BinOpType::Add, @@ -134,15 +196,23 @@ impl Parser { _ => unreachable!(), }; - a = Expr::BinOp(op_type, a.into(), b.into()); + lhs = Expr::BinOp(op_type, lhs.into(), rhs.into()); } - Ok(a) + Ok(lhs) } + /// Parse a multiplication expression from the TokenSteam. This can be a term and 0 or more + /// further terms separated by multiplication precedence operators (multiply '*', divide '/', + /// modulo '%') + /// + /// ### Grammar + /// `expr_mul = expr_term (("*"|"/"|"%") expr_term)*` pub fn parse_expr_mul(&mut self) -> PRes { - let mut a = self.parse_expr_term()?; + // Parse the left hand side / the main expression if there is nothing on the right + let mut lhs = self.parse_expr_term()?; + // Parse 0 or more expressions to the right side of the mul operators while matches!(self.curr(), Some(Token::Op(Op::Mul | Op::Div | Op::Mod))) { // We successfully matched curr against Some already in the while condition, so unwrap is fine let tok_op = self.advance().unwrap().clone(); @@ -156,31 +226,52 @@ impl Parser { _ => unreachable!(), }; - a = Expr::BinOp(op_type, a.into(), b.into()); + lhs = Expr::BinOp(op_type, lhs.into(), b.into()); } - Ok(a) + Ok(lhs) } + /// Parse a term expression from the TokenSteam. This can be the negation of a term, an add + /// expression enclosed by parentheses or a value. + /// + /// ### Grammar + /// `"-" expr_term | "(" expr_add ")" | expr_value` pub fn parse_expr_term(&mut self) -> PRes { let term = match self.curr() { + // Current token is an opening parentheses '(' -> Must be an enclosed expr_add Some(Token::Open(Group::Paren)) => { + // Skip the '(' self.advance(); - let a = self.parse_expr_add()?; + + let expr = self.parse_expr_add()?; + + // After the expression must be closing parentheses ')' if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { panic!("Missing closing parentheses while parsing term"); } - a + + expr } + // Current token is a minus '-' -> Must be a negated expr_term Some(Token::Op(Op::Sub)) => { + // Skip the '-' self.advance(); + + // Parse an expr_term in a Negation Node Expr::UnOp(UnOpType::Neg, self.parse_expr_term()?.into()) } + // Nothing special in the current -> Must be an expr_value _ => self.parse_expr_value()?, }; Ok(term) } + /// Parse a value expression from the TokenSteam. This can be a literal value, a function call + /// or a variable. + /// + /// ### Grammar + /// `expr_value = expr_literal | expr_fn_call | expr_variable` pub fn parse_expr_value(&mut self) -> PRes { match self.curr() { Some(Token::Literal(_)) => self.parse_expr_literal(), @@ -192,37 +283,62 @@ impl Parser { } } + /// Parse a function call from the TokenStream. This consists of an identifier and 0 or more + /// add expressions enclosed by parentheses '(', ')' and separated by commas ',' . + /// + /// ### Grammar + /// `expr_fn_call = IDENT "(" expr_add? ( "," expr_add )* ")"` pub fn parse_expr_fn_call(&mut self) -> PRes { // The first 2 checks are not really necessary for internal calls since parse_expr_value // verifies the tokens already + + // Get the function name let fn_name = match self.advance() { Some(Token::Ident(ident)) => ident.clone(), _ => panic!("Unexpected token while parsing function call. Expected identifier"), }; + // Check that there really is an opening parentheses if !matches!(self.advance(), Some(Token::Open(Group::Paren))) { panic!("Unexpected token while parsing function call. Expected '('"); } let mut args = Vec::new(); + // If there is not a closing parentheses directly after the opening "()", parse at least + // one add expression // TODO: This is *suboptimal* code if !matches!(self.curr(), Some(Token::Close(Group::Paren))) { args.push(self.parse_expr_add()?); + // As long as there are commas after the expressions, parse more expressions as + // parameters while matches!(self.curr(), Some(Token::Comma)) { self.advance(); args.push(self.parse_expr_add()?); } } + // Check if there really is a closing parentheses if !matches!(self.advance(), Some(Token::Close(Group::Paren))) { panic!("Unexpected token while parsing function call. Expected '('"); } - Ok(Expr::FnCall(FnCall { fn_name, args })) + // By default don't parse as an intrinsic function + let intrinsic = false; + + Ok(Expr::FnCall(FnCall { + intrinsic, + fn_name, + args, + })) } + /// Parse a variable name value. This consists of an identifier without parentheses afterwards. + /// The identifier represents the variable name. + /// + /// ### Grammar + /// `expr_varibale = IDENT` pub fn parse_expr_varibale(&mut self) -> PRes { match self.advance() { Some(Token::Ident(ident)) => Ok(Expr::Variable(ident.clone())), @@ -230,6 +346,10 @@ impl Parser { } } + /// Parse a literal value. This consists of a literal token. + /// + /// ### Grammar + /// `expr_literal = LITERAL` pub fn parse_expr_literal(&mut self) -> PRes { match self.advance() { Some(Token::Literal(lit)) => Ok(Expr::Literal(lit.clone())), @@ -308,6 +428,7 @@ mod tests { Box::new(Expr::UnOp( UnOpType::Neg, Expr::FnCall(FnCall { + intrinsic: false, fn_name, args: vec![Expr::Literal(Literal::Int64(9))], }) diff --git a/plang2_lib/src/token.rs b/plang2_lib/src/token.rs index 23fbac0..681bea8 100644 --- a/plang2_lib/src/token.rs +++ b/plang2_lib/src/token.rs @@ -4,41 +4,66 @@ use std::{fmt::Display, borrow::Cow}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Op { // Addition + + /// Add "+" Add, + /// Subtract "-" Sub, // Multiplications + + /// Multiply "*" Mul, + /// Divide "/" Div, + /// Modulo "%" Mod, - // Assignment + /// Assignment "=" Assign, // Equality + + /// Equal "==" Eq, + /// Not equal "!=" Neq, + /// Greater than ">" Gt, + /// Lesser than "<" Lt, + /// Greater or equal ">=" Ge, + /// Lesser or equal "<=" Le, - // Bool + // Boolean + + /// And "&&" And, + /// Or "||" Or, + /// Not "!" Not, + /// Xor "^" Xor, + /// Arrow "->" Arrow, } +/// Different types of parentheses #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Group { + /// Parentheses "(" | ")" Paren, + /// Brackets "[" | "]" Bracket, + /// Braces "{" | "}" Braces, } +/// Literal values for the different datatypes #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Literal { Boolean(bool), @@ -46,6 +71,7 @@ pub enum Literal { String(String), } +/// Language Keywords #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Keyword { Let, @@ -58,23 +84,35 @@ pub enum Keyword { Void, } + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Token { + /// Literal values Literal(Literal), + /// Operators Op(Op), + /// Opening parentheses Open(Group), + /// Closing parentheses Close(Group), - + /// Identifier Ident(String), - + /// Language keywords Keyword(Keyword), + /// Semicolon ";" Semicolon, + /// Colon ":" Colon, + /// Comma "," Comma, + /// Dot "." Dot, + /// Hashtag "#" + Hashtag, } +/// A token buffer with an index for iterating over the tokens pub struct TokenStream { tokens: Vec, idx: usize, @@ -83,7 +121,8 @@ pub struct TokenStream { impl Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let op: Cow<'static, str> = match self { + // String representation of the Token + let stok: Cow<'static, str> = match self { Token::Op(Op::Add) => "+".into(), Token::Op(Op::Sub) => "-".into(), @@ -123,6 +162,7 @@ impl Display for Token { Token::Colon => ":".into(), Token::Comma => ",".into(), Token::Dot => ".".into(), + Token::Hashtag => "#".into(), Token::Keyword(Keyword::Let) => "let".into(), Token::Keyword(Keyword::If) => "if".into(), @@ -134,28 +174,32 @@ impl Display for Token { Token::Keyword(Keyword::Void) => "void".into(), }; - write!(f, "{}", op) + write!(f, "{}", stok) } } impl TokenStream { + /// Create a new TokenStream from the given token buffer pub fn new(tokens: Vec) -> Self { Self { tokens, idx: 0 } } - pub fn as_vec(&self) -> &Vec { + /// Get the underlying token buffer as reference + pub fn as_vec(&self) -> &[Token] { &self.tokens } + /// Get the current token as reference. This does not advance to the next token pub fn curr(&self) -> Option<&Token> { self.tokens.get(self.idx) } + /// Get the next token as reference. This does not advance to the next token pub fn peek(&self) -> Option<&Token> { self.tokens.get(self.idx + 1) } - /// Advance to the next token. Sets curr to next and returns the old curr. + /// Advance to the next token. Sets curr to next and returns the old curr pub fn advance(&mut self) -> Option<&Token> { self.idx += 1; self.tokens.get(self.idx - 1) @@ -163,7 +207,7 @@ impl TokenStream { } impl Display for TokenStream { - /// Print the TokenStream with autofomatting + /// Print the TokenStream with autoformatting fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut indent = 0_usize; let mut fresh_line = true;