254 lines
6.6 KiB
Rust
254 lines
6.6 KiB
Rust
use std::{iter::Peekable, str::Chars};
|
|
|
|
use crate::parser::BinOpType;
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
/// Integer literal (64-bit)
|
|
I64(i64),
|
|
|
|
/// Left parenthesis ('(')
|
|
LParen,
|
|
|
|
/// Right parentheses (')')
|
|
RParen,
|
|
|
|
/// Plus (+)
|
|
Add,
|
|
|
|
/// Minus (-)
|
|
Sub,
|
|
|
|
/// Asterisk (*)
|
|
Mul,
|
|
|
|
/// Slash (/)
|
|
Div,
|
|
|
|
/// Percent (%)
|
|
Mod,
|
|
|
|
/// Pipe (|)
|
|
BOr,
|
|
|
|
/// Ampersand (&)
|
|
BAnd,
|
|
|
|
/// Circumflex (^)
|
|
BXor,
|
|
|
|
/// Shift Left (<<)
|
|
Shl,
|
|
|
|
/// Shift Right (>>)
|
|
Shr,
|
|
|
|
/// Equal sign (==)
|
|
Equ,
|
|
|
|
/// Not Equal sign (!=)
|
|
Neq,
|
|
|
|
/// Greater than (>)
|
|
Gt,
|
|
|
|
/// Greater or equal (>=)
|
|
Ge,
|
|
|
|
/// Less than (<)
|
|
Lt,
|
|
|
|
/// Less or equal (<=)
|
|
Le,
|
|
|
|
/// End of file
|
|
EoF,
|
|
}
|
|
|
|
struct Lexer<'a> {
|
|
code: Peekable<Chars<'a>>,
|
|
}
|
|
|
|
impl<'a> Lexer<'a> {
|
|
fn new(code: &'a str) -> Self {
|
|
let code = code.chars().peekable();
|
|
Self { code }
|
|
}
|
|
|
|
fn lex(&mut self) -> Vec<Token> {
|
|
let mut tokens = Vec::new();
|
|
|
|
while let Some(ch) = self.next() {
|
|
match ch {
|
|
// Skip whitespace
|
|
' ' => (),
|
|
|
|
// Lex numbers
|
|
'0'..='9' => {
|
|
let mut sval = String::from(ch);
|
|
|
|
// Do as long as a next char exists and it is a numeric char
|
|
while let Some(ch) = self.peek() {
|
|
// The next char is verified to be Some, so unwrap is safe
|
|
match ch {
|
|
// Underscore is a separator, so remove it but don't add to number
|
|
'_' => {
|
|
self.next().unwrap();
|
|
}
|
|
'0'..='9' => {
|
|
sval.push(self.next().unwrap());
|
|
}
|
|
// Next char is not a number, so stop and finish the number token
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
// TODO: We only added numeric chars to the string, but the conversion could still fail
|
|
tokens.push(Token::I64(sval.parse().unwrap()));
|
|
}
|
|
|
|
'>' if matches!(self.peek(), Some('>')) => {
|
|
self.next();
|
|
tokens.push(Token::Shr);
|
|
}
|
|
'<' if matches!(self.peek(), Some('<')) => {
|
|
self.next();
|
|
tokens.push(Token::Shl);
|
|
}
|
|
'=' if matches!(self.peek(), Some('=')) => {
|
|
self.next();
|
|
tokens.push(Token::Equ);
|
|
}
|
|
'!' if matches!(self.peek(), Some('=')) => {
|
|
self.next();
|
|
tokens.push(Token::Neq);
|
|
}
|
|
'<' if matches!(self.peek(), Some('=')) => {
|
|
self.next();
|
|
tokens.push(Token::Le);
|
|
}
|
|
'>' if matches!(self.peek(), Some('=')) => {
|
|
self.next();
|
|
tokens.push(Token::Ge);
|
|
}
|
|
|
|
'+' => tokens.push(Token::Add),
|
|
'-' => tokens.push(Token::Sub),
|
|
'*' => tokens.push(Token::Mul),
|
|
'/' => tokens.push(Token::Div),
|
|
'%' => tokens.push(Token::Mod),
|
|
'|' => tokens.push(Token::BOr),
|
|
'&' => tokens.push(Token::BAnd),
|
|
'^' => tokens.push(Token::BXor),
|
|
'(' => tokens.push(Token::LParen),
|
|
')' => tokens.push(Token::RParen),
|
|
'<' => tokens.push(Token::Lt),
|
|
'>' => tokens.push(Token::Gt),
|
|
|
|
'a'..='z' | 'A'..='Z' | '_' => {
|
|
let mut ident = String::from(ch);
|
|
|
|
// Do as long as a next char exists and it is a valid ident char
|
|
while let Some('a'..='z' | 'A'..='Z' | '_' | '0'..='9') = self.peek() {
|
|
// The next char is verified to be Some, so unwrap is safe
|
|
ident.push(self.next().unwrap());
|
|
}
|
|
|
|
match ident.as_str() {
|
|
"true" => tokens.push(Token::I64(1)),
|
|
"false" => tokens.push(Token::I64(0)),
|
|
_ => panic!("Lexer encountered unknown ident: '{}'", ident),
|
|
}
|
|
}
|
|
|
|
//TODO: Don't panic, keep calm
|
|
_ => panic!("Lexer encountered unexpected char: '{}'", ch),
|
|
}
|
|
}
|
|
|
|
tokens
|
|
}
|
|
|
|
/// Advance to next character and return the removed char
|
|
fn next(&mut self) -> Option<char> {
|
|
self.code.next()
|
|
}
|
|
|
|
/// Get the next character without removing it
|
|
fn peek(&mut self) -> Option<char> {
|
|
self.code.peek().copied()
|
|
}
|
|
}
|
|
|
|
/// Lex the provided code into a Token Buffer
|
|
///
|
|
/// TODO: Don't panic and implement error handling using Result
|
|
pub fn lex(code: &str) -> Vec<Token> {
|
|
let mut lexer = Lexer::new(code);
|
|
lexer.lex()
|
|
}
|
|
|
|
impl Token {
|
|
pub fn try_to_binop(&self) -> Option<BinOpType> {
|
|
Some(match self {
|
|
Token::Add => BinOpType::Add,
|
|
Token::Sub => BinOpType::Sub,
|
|
|
|
Token::Mul => BinOpType::Mul,
|
|
Token::Div => BinOpType::Div,
|
|
Token::Mod => BinOpType::Mod,
|
|
|
|
Token::BAnd => BinOpType::BAnd,
|
|
Token::BOr => BinOpType::BOr,
|
|
Token::BXor => BinOpType::BXor,
|
|
|
|
Token::Shl => BinOpType::Shl,
|
|
Token::Shr => BinOpType::Shr,
|
|
|
|
Token::Equ => BinOpType::Equ,
|
|
Token::Neq => BinOpType::Neq,
|
|
|
|
Token::Gt => BinOpType::Gt,
|
|
Token::Ge => BinOpType::Ge,
|
|
Token::Lt => BinOpType::Lt,
|
|
Token::Le => BinOpType::Le,
|
|
|
|
_ => return None,
|
|
})
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{lex, Token};
|
|
|
|
#[test]
|
|
fn test_lexer() {
|
|
let code = "33 +5*2 + 4456467*2334+3 % - / << ^ | & >>";
|
|
let expected = vec![
|
|
Token::I64(33),
|
|
Token::Add,
|
|
Token::I64(5),
|
|
Token::Mul,
|
|
Token::I64(2),
|
|
Token::Add,
|
|
Token::I64(4456467),
|
|
Token::Mul,
|
|
Token::I64(2334),
|
|
Token::Add,
|
|
Token::I64(3),
|
|
Token::Mod,
|
|
Token::Sub,
|
|
Token::Div,
|
|
Token::Shl,
|
|
Token::BXor,
|
|
Token::BOr,
|
|
Token::BAnd,
|
|
Token::Shr,
|
|
];
|
|
|
|
let actual = lex(code);
|
|
assert_eq!(expected, actual);
|
|
}
|
|
}
|