Implement string interning

This commit is contained in:
Daniel M 2022-02-04 00:23:43 +01:00
parent 4dbc3adfd5
commit d4c6f3d5dc
5 changed files with 94 additions and 43 deletions

View File

@ -1,4 +1,4 @@
use std::rc::Rc; use crate::stringstore::{StringStore, Sid};
/// Types for binary operators /// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
@ -81,9 +81,9 @@ pub enum Expression {
/// Integer literal (64-bit) /// Integer literal (64-bit)
I64(i64), I64(i64),
/// String literal /// String literal
String(Rc<String>), String(Sid),
/// Variable /// Variable
Var(String), Var(Sid),
/// Binary operation. Consists of type, left hand side and right hand side /// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expression>, Box<Expression>), BinOp(BinOpType, Box<Expression>, Box<Expression>),
/// Unary operation. Consists of type and operand /// Unary operation. Consists of type and operand
@ -120,6 +120,12 @@ pub enum Statement {
pub type BlockScope = Vec<Statement>; pub type BlockScope = Vec<Statement>;
#[derive(Clone, Default)]
pub struct Ast {
pub stringstore: StringStore,
pub main: BlockScope,
}
impl BinOpType { impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding. /// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add. /// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.

View File

@ -1,15 +1,13 @@
use std::{fmt::Display, rc::Rc};
use crate::{ use crate::{
ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType}, ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType},
lexer::lex, lexer::lex,
parser::parse, parser::parse, stringstore::{Sid, StringStore},
}; };
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value { pub enum Value {
I64(i64), I64(i64),
String(Rc<String>), String(Sid),
} }
#[derive(Default)] #[derive(Default)]
@ -17,7 +15,9 @@ pub struct Interpreter {
capture_output: bool, capture_output: bool,
output: Vec<Value>, output: Vec<Value>,
// Variable table stores the runtime values of variables // Variable table stores the runtime values of variables
vartable: Vec<(String, Value)>, vartable: Vec<(Sid, Value)>,
stringstore: StringStore,
} }
impl Interpreter { impl Interpreter {
@ -33,7 +33,7 @@ impl Interpreter {
&self.output &self.output
} }
fn get_var(&self, name: &str) -> Option<Value> { fn get_var(&self, name: Sid) -> Option<Value> {
self.vartable self.vartable
.iter() .iter()
.rev() .rev()
@ -41,7 +41,7 @@ impl Interpreter {
.map(|it| it.1.clone()) .map(|it| it.1.clone())
} }
fn get_var_mut(&mut self, name: &str) -> Option<&mut Value> { fn get_var_mut(&mut self, name: Sid) -> Option<&mut Value> {
self.vartable self.vartable
.iter_mut() .iter_mut()
.rev() .rev()
@ -57,13 +57,15 @@ impl Interpreter {
let ast = parse(tokens); let ast = parse(tokens);
if print_ast { if print_ast {
println!("{:#?}", ast); println!("{:#?}", ast.main);
} }
self.run(&ast); self.stringstore = ast.stringstore;
self.run_block(&ast.main);
} }
pub fn run(&mut self, prog: &BlockScope) { pub fn run_block(&mut self, prog: &BlockScope) {
let vartable_len = self.vartable.len(); let vartable_len = self.vartable.len();
for stmt in prog { for stmt in prog {
match stmt { match stmt {
@ -78,7 +80,7 @@ impl Interpreter {
break; break;
} }
self.run(&looop.body); self.run_block(&looop.body);
if let Some(adv) = &looop.advancement { if let Some(adv) = &looop.advancement {
self.resolve_expr(&adv); self.resolve_expr(&adv);
@ -92,7 +94,7 @@ impl Interpreter {
if self.capture_output { if self.capture_output {
self.output.push(result) self.output.push(result)
} else { } else {
print!("{}", result); self.print_value(&result);
} }
} }
@ -102,9 +104,9 @@ impl Interpreter {
body_false, body_false,
}) => { }) => {
if matches!(self.resolve_expr(condition), Value::I64(0)) { if matches!(self.resolve_expr(condition), Value::I64(0)) {
self.run(body_false); self.run_block(body_false);
} else { } else {
self.run(body_true); self.run_block(body_true);
} }
} }
} }
@ -119,14 +121,14 @@ impl Interpreter {
Expression::String(text) => Value::String(text.clone()), Expression::String(text) => Value::String(text.clone()),
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs), Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand), Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
Expression::Var(name) => self.resolve_var(name), Expression::Var(name) => self.resolve_var(*name),
} }
} }
fn resolve_var(&mut self, name: &str) -> Value { fn resolve_var(&mut self, name: Sid) -> Value {
match self.get_var(name) { match self.get_var(name) {
Some(val) => val.clone(), Some(val) => val.clone(),
None => panic!("Variable '{}' used but not declared", name), None => panic!("Variable '{}' used but not declared", self.stringstore.lookup(name).unwrap()),
} }
} }
@ -150,7 +152,7 @@ impl Interpreter {
return rhs; return rhs;
} }
(BinOpType::Assign, Expression::Var(name)) => { (BinOpType::Assign, Expression::Var(name)) => {
match self.get_var_mut(name) { match self.get_var_mut(*name) {
Some(val) => *val = rhs.clone(), Some(val) => *val = rhs.clone(),
None => panic!("Runtime Error: Trying to assign value to undeclared variable"), None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
} }
@ -187,15 +189,14 @@ impl Interpreter {
_ => panic!("Value types are not compatible"), _ => panic!("Value types are not compatible"),
} }
} }
fn print_value(&self, val: &Value) {
match val {
Value::I64(val) => print!("{}", val),
Value::String(text) => print!("{}", self.stringstore.lookup(*text).unwrap()),
}
} }
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::I64(val) => write!(f, "{}", val),
Value::String(text) => write!(f, "{}", text),
}
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -3,6 +3,7 @@ pub mod interpreter;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod token; pub mod token;
pub mod stringstore;
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@ -94,3 +95,4 @@ mod tests {
assert_eq!(interpreter.output(), &expected_output); assert_eq!(interpreter.output(), &expected_output);
} }
} }

View File

@ -1,28 +1,42 @@
use std::iter::Peekable; use std::iter::Peekable;
use crate::ast::*; use crate::ast::*;
use crate::stringstore::StringStore;
use crate::token::Token; use crate::token::Token;
/// Parse the given tokens into an abstract syntax tree /// Parse the given tokens into an abstract syntax tree
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> BlockScope { pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let mut parser = Parser::new(tokens); let parser = Parser::new(tokens);
parser.parse() parser.parse()
} }
struct Parser<T: Iterator<Item = Token>> { struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>, tokens: Peekable<T>,
stringstore: StringStore,
} }
impl<T: Iterator<Item = Token>> Parser<T> { impl<T: Iterator<Item = Token>> Parser<T> {
/// Create a new parser to parse the given Token Stream /// Create a new parser to parse the given Token Stream
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self { pub fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
let tokens = tokens.into_iter().peekable(); let tokens = tokens.into_iter().peekable();
Self { tokens } let stringstore = StringStore::new();
Self {
tokens,
stringstore,
}
}
pub fn parse(mut self) -> Ast {
let main = self.parse_scoped_block();
Ast {
main,
stringstore: self.stringstore,
}
} }
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until /// Parse tokens into an abstract syntax tree. This will continuously parse statements until
/// encountering end-of-file or a block end '}' . /// encountering end-of-file or a block end '}' .
fn parse(&mut self) -> BlockScope { fn parse_scoped_block(&mut self) -> BlockScope {
let mut prog = Vec::new(); let mut prog = Vec::new();
loop { loop {
@ -86,7 +100,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing if: Expected '{{'") panic!("Error lexing if: Expected '{{'")
} }
let body_true = self.parse(); let body_true = self.parse_scoped_block();
if !matches!(self.next(), Token::RBraces) { if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing if: Expected '}}'") panic!("Error lexing if: Expected '}}'")
@ -101,7 +115,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing if: Expected '{{'") panic!("Error lexing if: Expected '{{'")
} }
body_false = self.parse(); body_false = self.parse_scoped_block();
if !matches!(self.next(), Token::RBraces) { if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing if: Expected '}}'") panic!("Error lexing if: Expected '}}'")
@ -128,7 +142,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
match self.next() { match self.next() {
Token::LBraces => { Token::LBraces => {
body = self.parse(); body = self.parse_scoped_block();
} }
Token::Semicolon => { Token::Semicolon => {
@ -138,7 +152,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing loop: Expected '{{'") panic!("Error lexing loop: Expected '{{'")
} }
body = self.parse(); body = self.parse_scoped_block();
} }
_ => panic!("Error lexing loop: Expected ';' or '{{'"), _ => panic!("Error lexing loop: Expected ';' or '{{'"),
@ -196,9 +210,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Token::I64(val) => Expression::I64(val), Token::I64(val) => Expression::I64(val),
// Literal String // Literal String
Token::String(text) => Expression::String(text.into()), Token::String(text) => Expression::String(self.stringstore.intern_or_lookup(&text)),
Token::Ident(name) => Expression::Var(name), Token::Ident(name) => Expression::Var(self.stringstore.intern_or_lookup(&name)),
// Parentheses grouping // Parentheses grouping
Token::LParen => { Token::LParen => {
@ -248,10 +262,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{parse, BinOpType, Expression}; use super::{parse, BinOpType, Expression};
use crate::{ use crate::{parser::Statement, token::Token};
parser::Statement,
token::Token,
};
#[test] #[test]
fn test_parser() { fn test_parser() {
@ -287,6 +298,6 @@ mod tests {
let expected = vec![expected]; let expected = vec![expected];
let actual = parse(tokens); let actual = parse(tokens);
assert_eq!(expected, actual); assert_eq!(expected, actual.main);
} }
} }

31
src/stringstore.rs Normal file
View File

@ -0,0 +1,31 @@
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Sid(usize);
#[derive(Clone, Default)]
pub struct StringStore {
strings: Vec<String>,
sids: HashMap<String, Sid>,
}
impl StringStore {
pub fn new() -> Self {
Self { strings: Vec::new(), sids: HashMap::new() }
}
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
self.sids.get(text).copied().unwrap_or_else(|| {
let sid = Sid(self.strings.len());
self.strings.push(text.to_string());
self.sids.insert(text.to_string(), sid);
sid
})
}
pub fn lookup(&self, sid: Sid) -> Option<&String> {
self.strings.get(sid.0)
}
}