Refactor lexer match loop

2022-02-08 22:54:41 +01:00 · 2022-02-08 22:54:41 +01:00 · 926bdeb2dc
commit 926bdeb2dc
parent 726dd62794
1 changed files with 84 additions and 82 deletions
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -20,118 +20,100 @@ pub enum LexErr {
 /// Lex the provided code into a Token Buffer
 pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
-    let mut lexer = Lexer::new(code);
+    let lexer = Lexer::new(code);
    lexer.lex()
 }
 struct Lexer<'a> {
    /// The sourcecode text as an iterator over the chars
    code: Peekable<Chars<'a>>,
    /// The lexed tokens
    tokens: Vec<Token>,
    /// The sourcecode character that is currently being lexed
    current_char: char,
 }
 impl<'a> Lexer<'a> {
    fn new(code: &'a str) -> Self {
        let code = code.chars().peekable();
-        Self { code }
+        let tokens = Vec::new();
        let current_char = '\0';
        Self {
            code,
            tokens,
            current_char,
        }
    }
-    fn lex(&mut self) -> Result<Vec<Token>, LexErr> {
+    fn lex(mut self) -> Result<Vec<Token>, LexErr> {
        let mut tokens = Vec::new();
        loop {
-            match self.next() {
+            self.current_char = self.next();
            match (self.current_char, self.peek()) {
                // Stop lexing at EOF
-                '\0' => break,
+                ('\0', _) => break,
                // Skip whitespace
-                ' ' | '\t' | '\n' | '\r' => (),
+                (' ' | '\t' | '\n' | '\r', _) => (),
                // Line comment. Consume every char until linefeed (next line)
-                '/' if matches!(self.peek(), '/') => while !matches!(self.next(), '\n' | '\0') {},
+                ('/', '/') => while !matches!(self.next(), '\n' | '\0') {},
                // Double character tokens
-                '>' if matches!(self.peek(), '>') => {
+                ('>', '>') => self.push_tok_consume(T![>>]),
-                    self.next();
+                ('<', '<') => self.push_tok_consume(T![<<]),
-                    tokens.push(T![>>]);
+                ('=', '=') => self.push_tok_consume(T![==]),
-                }
+                ('!', '=') => self.push_tok_consume(T![!=]),
-                '<' if matches!(self.peek(), '<') => {
+                ('<', '=') => self.push_tok_consume(T![<=]),
-                    self.next();
+                ('>', '=') => self.push_tok_consume(T![>=]),
-                    tokens.push(T![<<]);
+                ('<', '-') => self.push_tok_consume(T![<-]),
-                }
+                ('&', '&') => self.push_tok_consume(T![&&]),
-                '=' if matches!(self.peek(), '=') => {
+                ('|', '|') => self.push_tok_consume(T![||]),
                    self.next();
                    tokens.push(T![==]);
                }
                '!' if matches!(self.peek(), '=') => {
                    self.next();
                    tokens.push(T![!=]);
                }
                '<' if matches!(self.peek(), '=') => {
                    self.next();
                    tokens.push(T![<=]);
                }
                '>' if matches!(self.peek(), '=') => {
                    self.next();
                    tokens.push(T![>=]);
                }
                '<' if matches!(self.peek(), '-') => {
                    self.next();
                    tokens.push(T![<-]);
                }
                '&' if matches!(self.peek(), '&') => {
                    self.next();
                    tokens.push(T![&&]);
                }
                '|' if matches!(self.peek(), '|') => {
                    self.next();
                    tokens.push(T![||]);
                }
                // Single character tokens
-                ';' => tokens.push(T![;]),
+                (';', _) => self.push_tok(T![;]),
-                '+' => tokens.push(T![+]),
+                ('+', _) => self.push_tok(T![+]),
-                '-' => tokens.push(T![-]),
+                ('-', _) => self.push_tok(T![-]),
-                '*' => tokens.push(T![*]),
+                ('*', _) => self.push_tok(T![*]),
-                '/' => tokens.push(T![/]),
+                ('/', _) => self.push_tok(T![/]),
-                '%' => tokens.push(T![%]),
+                ('%', _) => self.push_tok(T![%]),
-                '|' => tokens.push(T![|]),
+                ('|', _) => self.push_tok(T![|]),
-                '&' => tokens.push(T![&]),
+                ('&', _) => self.push_tok(T![&]),
-                '^' => tokens.push(T![^]),
+                ('^', _) => self.push_tok(T![^]),
-                '(' => tokens.push(T!['(']),
+                ('(', _) => self.push_tok(T!['(']),
-                ')' => tokens.push(T![')']),
+                (')', _) => self.push_tok(T![')']),
-                '~' => tokens.push(T![~]),
+                ('~', _) => self.push_tok(T![~]),
-                '<' => tokens.push(T![<]),
+                ('<', _) => self.push_tok(T![<]),
-                '>' => tokens.push(T![>]),
+                ('>', _) => self.push_tok(T![>]),
-                '=' => tokens.push(T![=]),
+                ('=', _) => self.push_tok(T![=]),
-                '{' => tokens.push(T!['{']),
+                ('{', _) => self.push_tok(T!['{']),
-                '}' => tokens.push(T!['}']),
+                ('}', _) => self.push_tok(T!['}']),
-                '!' => tokens.push(T![!]),
+                ('!', _) => self.push_tok(T![!]),
-                '[' => tokens.push(T!['[']),
+                ('[', _) => self.push_tok(T!['[']),
-                ']' => tokens.push(T![']']),
+                (']', _) => self.push_tok(T![']']),
                // Special tokens with variable length
                // Lex multiple characters together as numbers
-                ch @ '0'..='9' => tokens.push(self.lex_number(ch)?),
+                ('0'..='9', _) => self.lex_number()?,
                // Lex multiple characters together as a string
-                '"' => tokens.push(self.lex_str()?),
+                ('"', _) => self.lex_str()?,
                // Lex multiple characters together as identifier
-                ch @ ('a'..='z' | 'A'..='Z' | '_') => tokens.push(self.lex_identifier(ch)?),
+                ('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?,
-                ch => Err(LexErr::UnexpectedChar(ch))?,
+                (ch, _) => Err(LexErr::UnexpectedChar(ch))?,
            }
        }
-        Ok(tokens)
+        Ok(self.tokens)
    }
-    /// Lex multiple characters as a number until encountering a non numeric digit. This includes
+    /// Lex multiple characters as a number until encountering a non numeric digit. The 
-    /// the first character
+    /// successfully lexed i64 literal token is appended to the stored tokens.
-    fn lex_number(&mut self, first_char: char) -> Result<Token, LexErr> {
+    fn lex_number(&mut self) -> Result<(), LexErr> {
        // String representation of the integer value
-        let mut sval = String::from(first_char);
+        let mut sval = String::from(self.current_char);
        // Do as long as a next char exists and it is a numeric char
        loop {
@ -151,11 +133,15 @@ impl<'a> Lexer<'a> {
        // Try to convert the string representation of the value to i64
        let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
-        Ok(T![i64(i64val)])
+
        self.push_tok(T![i64(i64val)]);
        Ok(())
    }
-    /// Lex characters as a string until encountering an unescaped closing doublequoute char '"'
+    /// Lex characters as a string until encountering an unescaped closing doublequoute char '"'. 
-    fn lex_str(&mut self) -> Result<Token, LexErr> {
+    /// The successfully lexed string literal token is appended to the stored tokens.
    fn lex_str(&mut self) -> Result<(), LexErr> {
        // Opening " was consumed in match
        let mut text = String::new();
@ -185,12 +171,15 @@ impl<'a> Lexer<'a> {
        // Consume closing "
        self.next();
-        Ok(T![str(text)])
+        self.push_tok(T![str(text)]);
        Ok(())
    }
-    /// Lex characters from the text as an identifier. This includes the first character passed in
+    /// Lex characters from the text as an identifier. The successfully lexed ident or keyword 
-    fn lex_identifier(&mut self, first_char: char) -> Result<Token, LexErr> {
+    /// token is appended to the stored tokens.
-        let mut ident = String::from(first_char);
+    fn lex_identifier(&mut self) -> Result<(), LexErr> {
        let mut ident = String::from(self.current_char);
        // Do as long as a next char exists and it is a valid char for an identifier
        loop {
@ -215,7 +204,20 @@ impl<'a> Lexer<'a> {
            _ => T![ident(ident)],
        };
-        Ok(token)
+        self.push_tok(token);
        Ok(())
    }
    /// Push the given token into the stored tokens
    fn push_tok(&mut self, token: Token) {
        self.tokens.push(token);
    }
    /// Same as `push_tok` but also consumes the next token, removing it from the code iter
    fn push_tok_consume(&mut self, token: Token) {
        self.next();
        self.tokens.push(token);
    }
    /// Advance to next character and return the removed char