Small refactor for lexer
This commit is contained in:
parent
c2b9ee71b8
commit
f6152670aa
184
src/lexer.rs
184
src/lexer.rs
@ -107,95 +107,16 @@ impl<'a> Lexer<'a> {
|
|||||||
'}' => tokens.push(Token::RBraces),
|
'}' => tokens.push(Token::RBraces),
|
||||||
'!' => tokens.push(Token::LNot),
|
'!' => tokens.push(Token::LNot),
|
||||||
|
|
||||||
// Lex numbers
|
// Special tokens with variable length
|
||||||
ch @ '0'..='9' => {
|
|
||||||
// String representation of the integer value
|
|
||||||
let mut sval = String::from(ch);
|
|
||||||
|
|
||||||
// Do as long as a next char exists and it is a numeric char
|
// Lex multiple characters together as numbers
|
||||||
loop {
|
ch @ '0'..='9' => tokens.push(self.lex_number(ch)?),
|
||||||
// The next char is verified to be Some, so unwrap is safe
|
|
||||||
match self.peek() {
|
|
||||||
// Underscore is a separator, so remove it but don't add to number
|
|
||||||
'_' => {
|
|
||||||
self.next();
|
|
||||||
}
|
|
||||||
'0'..='9' => {
|
|
||||||
sval.push(self.next());
|
|
||||||
}
|
|
||||||
// Next char is not a number, so stop and finish the number token
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to convert the string representation of the value to i64
|
// Lex multiple characters together as a string
|
||||||
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
'"' => tokens.push(self.lex_str()?),
|
||||||
tokens.push(Token::I64(i64val));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lex a string
|
// Lex multiple characters together as identifier
|
||||||
'"' => {
|
ch @ ('a'..='z' | 'A'..='Z' | '_') => tokens.push(self.lex_identifier(ch)?),
|
||||||
// Opening " was consumed in match
|
|
||||||
|
|
||||||
let mut text = String::new();
|
|
||||||
|
|
||||||
// Read all chars until encountering the closing "
|
|
||||||
loop {
|
|
||||||
match self.peek() {
|
|
||||||
'"' => break,
|
|
||||||
// If the end of file is reached while still waiting for '"', error out
|
|
||||||
'\0' => Err(LexErr::MissingClosingString)?,
|
|
||||||
_ => match self.next() {
|
|
||||||
// Backshlash indicates an escaped character
|
|
||||||
'\\' => match self.next() {
|
|
||||||
'n' => text.push('\n'),
|
|
||||||
'r' => text.push('\r'),
|
|
||||||
't' => text.push('\t'),
|
|
||||||
'\\' => text.push('\\'),
|
|
||||||
'"' => text.push('"'),
|
|
||||||
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
|
||||||
},
|
|
||||||
// All other characters are simply appended to the string
|
|
||||||
ch => text.push(ch),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Consume closing "
|
|
||||||
self.next();
|
|
||||||
|
|
||||||
tokens.push(Token::String(text))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lex characters as identifier
|
|
||||||
ch @ ('a'..='z' | 'A'..='Z' | '_') => {
|
|
||||||
let mut ident = String::from(ch);
|
|
||||||
|
|
||||||
// Do as long as a next char exists and it is a valid char for an identifier
|
|
||||||
loop {
|
|
||||||
match self.peek() {
|
|
||||||
// In the middle of an identifier numbers are also allowed
|
|
||||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
|
|
||||||
ident.push(self.next());
|
|
||||||
}
|
|
||||||
// Next char is not valid, so stop and finish the ident token
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for pre-defined keywords
|
|
||||||
let token = match ident.as_str() {
|
|
||||||
"loop" => Token::Loop,
|
|
||||||
"print" => Token::Print,
|
|
||||||
"if" => Token::If,
|
|
||||||
"else" => Token::Else,
|
|
||||||
|
|
||||||
// If it doesn't match a keyword, it is a normal identifier
|
|
||||||
_ => Token::Ident(ident),
|
|
||||||
};
|
|
||||||
|
|
||||||
tokens.push(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
ch => Err(LexErr::UnexpectedChar(ch))?,
|
ch => Err(LexErr::UnexpectedChar(ch))?,
|
||||||
}
|
}
|
||||||
@ -204,6 +125,97 @@ impl<'a> Lexer<'a> {
|
|||||||
Ok(tokens)
|
Ok(tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lex multiple characters as a number until encountering a non numeric digit. This includes
|
||||||
|
/// the first character
|
||||||
|
fn lex_number(&mut self, first_char: char) -> Result<Token, LexErr> {
|
||||||
|
// String representation of the integer value
|
||||||
|
let mut sval = String::from(first_char);
|
||||||
|
|
||||||
|
// Do as long as a next char exists and it is a numeric char
|
||||||
|
loop {
|
||||||
|
// The next char is verified to be Some, so unwrap is safe
|
||||||
|
match self.peek() {
|
||||||
|
// Underscore is a separator, so remove it but don't add to number
|
||||||
|
'_' => {
|
||||||
|
self.next();
|
||||||
|
}
|
||||||
|
'0'..='9' => {
|
||||||
|
sval.push(self.next());
|
||||||
|
}
|
||||||
|
// Next char is not a number, so stop and finish the number token
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to convert the string representation of the value to i64
|
||||||
|
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
||||||
|
Ok(Token::I64(i64val))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex characters as a string until encountering an unescaped closing doublequoute char '"'
|
||||||
|
fn lex_str(&mut self) -> Result<Token, LexErr> {
|
||||||
|
// Opening " was consumed in match
|
||||||
|
|
||||||
|
let mut text = String::new();
|
||||||
|
|
||||||
|
// Read all chars until encountering the closing "
|
||||||
|
loop {
|
||||||
|
match self.peek() {
|
||||||
|
'"' => break,
|
||||||
|
// If the end of file is reached while still waiting for '"', error out
|
||||||
|
'\0' => Err(LexErr::MissingClosingString)?,
|
||||||
|
_ => match self.next() {
|
||||||
|
// Backshlash indicates an escaped character
|
||||||
|
'\\' => match self.next() {
|
||||||
|
'n' => text.push('\n'),
|
||||||
|
'r' => text.push('\r'),
|
||||||
|
't' => text.push('\t'),
|
||||||
|
'\\' => text.push('\\'),
|
||||||
|
'"' => text.push('"'),
|
||||||
|
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
||||||
|
},
|
||||||
|
// All other characters are simply appended to the string
|
||||||
|
ch => text.push(ch),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consume closing "
|
||||||
|
self.next();
|
||||||
|
|
||||||
|
Ok(Token::String(text))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex characters from the text as an identifier. This includes the first character passed in
|
||||||
|
fn lex_identifier(&mut self, first_char: char) -> Result<Token, LexErr> {
|
||||||
|
let mut ident = String::from(first_char);
|
||||||
|
|
||||||
|
// Do as long as a next char exists and it is a valid char for an identifier
|
||||||
|
loop {
|
||||||
|
match self.peek() {
|
||||||
|
// In the middle of an identifier numbers are also allowed
|
||||||
|
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {
|
||||||
|
ident.push(self.next());
|
||||||
|
}
|
||||||
|
// Next char is not valid, so stop and finish the ident token
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for pre-defined keywords
|
||||||
|
let token = match ident.as_str() {
|
||||||
|
"loop" => Token::Loop,
|
||||||
|
"print" => Token::Print,
|
||||||
|
"if" => Token::If,
|
||||||
|
"else" => Token::Else,
|
||||||
|
|
||||||
|
// If it doesn't match a keyword, it is a normal identifier
|
||||||
|
_ => Token::Ident(ident),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(token)
|
||||||
|
}
|
||||||
|
|
||||||
/// Advance to next character and return the removed char
|
/// Advance to next character and return the removed char
|
||||||
fn next(&mut self) -> char {
|
fn next(&mut self) -> char {
|
||||||
self.code.next().unwrap_or('\0')
|
self.code.next().unwrap_or('\0')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user