Refactor, Comments, Bugfix for lexer
- Small refactoring in the lexer - Added some more comments to the lexer - Fixed endless loop when encountering comment in last line
This commit is contained in:
parent
75e326e343
commit
88c5be6439
77
src/lexer.rs
77
src/lexer.rs
@ -1,7 +1,7 @@
|
|||||||
use std::{iter::Peekable, str::Chars};
|
|
||||||
use anyhow::Result;
|
|
||||||
use thiserror::Error;
|
|
||||||
use crate::token::Token;
|
use crate::token::Token;
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::{iter::Peekable, str::Chars};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum LexErr {
|
pub enum LexErr {
|
||||||
@ -15,10 +15,17 @@ pub enum LexErr {
|
|||||||
UnexpectedChar(char),
|
UnexpectedChar(char),
|
||||||
|
|
||||||
#[error("Missing closing string quote '\"'")]
|
#[error("Missing closing string quote '\"'")]
|
||||||
MissingClosingString
|
MissingClosingString,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex the provided code into a Token Buffer
|
||||||
|
pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
|
||||||
|
let mut lexer = Lexer::new(code);
|
||||||
|
lexer.lex()
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Lexer<'a> {
|
struct Lexer<'a> {
|
||||||
|
/// The sourcecode text as an iterator over the chars
|
||||||
code: Peekable<Chars<'a>>,
|
code: Peekable<Chars<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,12 +40,16 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
loop {
|
loop {
|
||||||
match self.next() {
|
match self.next() {
|
||||||
// Skip whitespace
|
|
||||||
' ' | '\t' | '\n' | '\r' => (),
|
|
||||||
|
|
||||||
// Stop lexing at EOF
|
// Stop lexing at EOF
|
||||||
'\0' => break,
|
'\0' => break,
|
||||||
|
|
||||||
|
// Skip whitespace
|
||||||
|
' ' | '\t' | '\n' | '\r' => (),
|
||||||
|
|
||||||
|
// Line comment. Consume every char until linefeed (next line)
|
||||||
|
'/' if matches!(self.peek(), '/') => while !matches!(self.next(), '\n' | '\0') {},
|
||||||
|
|
||||||
|
// Double character tokens
|
||||||
'>' if matches!(self.peek(), '>') => {
|
'>' if matches!(self.peek(), '>') => {
|
||||||
self.next();
|
self.next();
|
||||||
tokens.push(Token::Shr);
|
tokens.push(Token::Shr);
|
||||||
@ -76,9 +87,7 @@ impl<'a> Lexer<'a> {
|
|||||||
tokens.push(Token::LOr);
|
tokens.push(Token::LOr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Line comment. Consume every char until linefeed (next line)
|
// Single character tokens
|
||||||
'/' if matches!(self.peek(), '/') => while self.next() != '\n' {},
|
|
||||||
|
|
||||||
';' => tokens.push(Token::Semicolon),
|
';' => tokens.push(Token::Semicolon),
|
||||||
'+' => tokens.push(Token::Add),
|
'+' => tokens.push(Token::Add),
|
||||||
'-' => tokens.push(Token::Sub),
|
'-' => tokens.push(Token::Sub),
|
||||||
@ -100,6 +109,7 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
// Lex numbers
|
// Lex numbers
|
||||||
ch @ '0'..='9' => {
|
ch @ '0'..='9' => {
|
||||||
|
// String representation of the integer value
|
||||||
let mut sval = String::from(ch);
|
let mut sval = String::from(ch);
|
||||||
|
|
||||||
// Do as long as a next char exists and it is a numeric char
|
// Do as long as a next char exists and it is a numeric char
|
||||||
@ -118,7 +128,7 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: We only added numeric chars to the string, but the conversion could still fail
|
// Try to convert the string representation of the value to i64
|
||||||
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
||||||
tokens.push(Token::I64(i64val));
|
tokens.push(Token::I64(i64val));
|
||||||
}
|
}
|
||||||
@ -129,26 +139,25 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
|
|
||||||
|
// Read all chars until encountering the closing "
|
||||||
loop {
|
loop {
|
||||||
match self.peek() {
|
match self.peek() {
|
||||||
'"' => break,
|
'"' => break,
|
||||||
|
// If the end of file is reached while still waiting for '"', error out
|
||||||
'\0' => Err(LexErr::MissingClosingString)?,
|
'\0' => Err(LexErr::MissingClosingString)?,
|
||||||
_ => {
|
_ => match self.next() {
|
||||||
|
// Backshlash indicates an escaped character
|
||||||
match self.next() {
|
'\\' => match self.next() {
|
||||||
'\\' => {
|
'n' => text.push('\n'),
|
||||||
match self.next() {
|
'r' => text.push('\r'),
|
||||||
'n' => text.push('\n'),
|
't' => text.push('\t'),
|
||||||
'r' => text.push('\r'),
|
'\\' => text.push('\\'),
|
||||||
't' => text.push('\t'),
|
'"' => text.push('"'),
|
||||||
'\\' => text.push('\\'),
|
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
||||||
'"' => text.push('"'),
|
},
|
||||||
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
// All other characters are simply appended to the string
|
||||||
}
|
ch => text.push(ch),
|
||||||
}
|
},
|
||||||
ch => text.push(ch),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,7 +165,6 @@ impl<'a> Lexer<'a> {
|
|||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
tokens.push(Token::String(text))
|
tokens.push(Token::String(text))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lex characters as identifier
|
// Lex characters as identifier
|
||||||
@ -166,6 +174,7 @@ impl<'a> Lexer<'a> {
|
|||||||
// Do as long as a next char exists and it is a valid char for an identifier
|
// Do as long as a next char exists and it is a valid char for an identifier
|
||||||
loop {
|
loop {
|
||||||
match self.peek() {
|
match self.peek() {
|
||||||
|
// In the middle of an identifier numbers are also allowed
|
||||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
|
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
|
||||||
ident.push(self.next());
|
ident.push(self.next());
|
||||||
}
|
}
|
||||||
@ -174,18 +183,20 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for pre-defined keywords
|
||||||
let token = match ident.as_str() {
|
let token = match ident.as_str() {
|
||||||
"loop" => Token::Loop,
|
"loop" => Token::Loop,
|
||||||
"print" => Token::Print,
|
"print" => Token::Print,
|
||||||
"if" => Token::If,
|
"if" => Token::If,
|
||||||
"else" => Token::Else,
|
"else" => Token::Else,
|
||||||
|
|
||||||
|
// If it doesn't match a keyword, it is a normal identifier
|
||||||
_ => Token::Ident(ident),
|
_ => Token::Ident(ident),
|
||||||
};
|
};
|
||||||
|
|
||||||
tokens.push(token);
|
tokens.push(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: Don't panic, keep calm
|
|
||||||
ch => Err(LexErr::UnexpectedChar(ch))?,
|
ch => Err(LexErr::UnexpectedChar(ch))?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -204,14 +215,6 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex the provided code into a Token Buffer
|
|
||||||
///
|
|
||||||
/// TODO: Don't panic and implement error handling using Result
|
|
||||||
pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
|
|
||||||
let mut lexer = Lexer::new(code);
|
|
||||||
lexer.lex()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{lex, Token};
|
use super::{lex, Token};
|
||||||
|
|||||||
@ -280,6 +280,7 @@ mod tests {
|
|||||||
Token::I64(3),
|
Token::I64(3),
|
||||||
Token::Sub,
|
Token::Sub,
|
||||||
Token::I64(4),
|
Token::I64(4),
|
||||||
|
Token::Semicolon,
|
||||||
];
|
];
|
||||||
|
|
||||||
let expected = Statement::Expr(Expression::BinOp(
|
let expected = Statement::Expr(Expression::BinOp(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user