Implement string interning

This commit is contained in:
Daniel M 2022-02-04 00:23:43 +01:00
parent 4dbc3adfd5
commit d4c6f3d5dc
5 changed files with 94 additions and 43 deletions

View File

@ -1,4 +1,4 @@
use std::rc::Rc;
use crate::stringstore::{StringStore, Sid};
/// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)]
@ -81,9 +81,9 @@ pub enum Expression {
/// Integer literal (64-bit)
I64(i64),
/// String literal
String(Rc<String>),
String(Sid),
/// Variable
Var(String),
Var(Sid),
/// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expression>, Box<Expression>),
/// Unary operation. Consists of type and operand
@ -120,6 +120,12 @@ pub enum Statement {
pub type BlockScope = Vec<Statement>;
#[derive(Clone, Default)]
pub struct Ast {
pub stringstore: StringStore,
pub main: BlockScope,
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.

View File

@ -1,15 +1,13 @@
use std::{fmt::Display, rc::Rc};
use crate::{
ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType},
lexer::lex,
parser::parse,
parser::parse, stringstore::{Sid, StringStore},
};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value {
I64(i64),
String(Rc<String>),
String(Sid),
}
#[derive(Default)]
@ -17,7 +15,9 @@ pub struct Interpreter {
capture_output: bool,
output: Vec<Value>,
// Variable table stores the runtime values of variables
vartable: Vec<(String, Value)>,
vartable: Vec<(Sid, Value)>,
stringstore: StringStore,
}
impl Interpreter {
@ -33,7 +33,7 @@ impl Interpreter {
&self.output
}
fn get_var(&self, name: &str) -> Option<Value> {
fn get_var(&self, name: Sid) -> Option<Value> {
self.vartable
.iter()
.rev()
@ -41,7 +41,7 @@ impl Interpreter {
.map(|it| it.1.clone())
}
fn get_var_mut(&mut self, name: &str) -> Option<&mut Value> {
fn get_var_mut(&mut self, name: Sid) -> Option<&mut Value> {
self.vartable
.iter_mut()
.rev()
@ -57,13 +57,15 @@ impl Interpreter {
let ast = parse(tokens);
if print_ast {
println!("{:#?}", ast);
println!("{:#?}", ast.main);
}
self.run(&ast);
self.stringstore = ast.stringstore;
self.run_block(&ast.main);
}
pub fn run(&mut self, prog: &BlockScope) {
pub fn run_block(&mut self, prog: &BlockScope) {
let vartable_len = self.vartable.len();
for stmt in prog {
match stmt {
@ -78,7 +80,7 @@ impl Interpreter {
break;
}
self.run(&looop.body);
self.run_block(&looop.body);
if let Some(adv) = &looop.advancement {
self.resolve_expr(&adv);
@ -92,7 +94,7 @@ impl Interpreter {
if self.capture_output {
self.output.push(result)
} else {
print!("{}", result);
self.print_value(&result);
}
}
@ -102,9 +104,9 @@ impl Interpreter {
body_false,
}) => {
if matches!(self.resolve_expr(condition), Value::I64(0)) {
self.run(body_false);
self.run_block(body_false);
} else {
self.run(body_true);
self.run_block(body_true);
}
}
}
@ -119,14 +121,14 @@ impl Interpreter {
Expression::String(text) => Value::String(text.clone()),
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
Expression::Var(name) => self.resolve_var(name),
Expression::Var(name) => self.resolve_var(*name),
}
}
fn resolve_var(&mut self, name: &str) -> Value {
fn resolve_var(&mut self, name: Sid) -> Value {
match self.get_var(name) {
Some(val) => val.clone(),
None => panic!("Variable '{}' used but not declared", name),
None => panic!("Variable '{}' used but not declared", self.stringstore.lookup(name).unwrap()),
}
}
@ -150,7 +152,7 @@ impl Interpreter {
return rhs;
}
(BinOpType::Assign, Expression::Var(name)) => {
match self.get_var_mut(name) {
match self.get_var_mut(*name) {
Some(val) => *val = rhs.clone(),
None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
}
@ -187,15 +189,14 @@ impl Interpreter {
_ => panic!("Value types are not compatible"),
}
}
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::I64(val) => write!(f, "{}", val),
Value::String(text) => write!(f, "{}", text),
fn print_value(&self, val: &Value) {
match val {
Value::I64(val) => print!("{}", val),
Value::String(text) => print!("{}", self.stringstore.lookup(*text).unwrap()),
}
}
}
#[cfg(test)]

View File

@ -3,6 +3,7 @@ pub mod interpreter;
pub mod lexer;
pub mod parser;
pub mod token;
pub mod stringstore;
#[cfg(test)]
mod tests {
@ -94,3 +95,4 @@ mod tests {
assert_eq!(interpreter.output(), &expected_output);
}
}

View File

@ -1,28 +1,42 @@
use std::iter::Peekable;
use crate::ast::*;
use crate::stringstore::StringStore;
use crate::token::Token;
/// Parse the given tokens into an abstract syntax tree
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> BlockScope {
let mut parser = Parser::new(tokens);
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let parser = Parser::new(tokens);
parser.parse()
}
struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>,
stringstore: StringStore,
}
impl<T: Iterator<Item = Token>> Parser<T> {
/// Create a new parser to parse the given Token Stream
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
pub fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
let tokens = tokens.into_iter().peekable();
Self { tokens }
let stringstore = StringStore::new();
Self {
tokens,
stringstore,
}
}
pub fn parse(mut self) -> Ast {
let main = self.parse_scoped_block();
Ast {
main,
stringstore: self.stringstore,
}
}
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
/// encountering end-of-file or a block end '}' .
fn parse(&mut self) -> BlockScope {
fn parse_scoped_block(&mut self) -> BlockScope {
let mut prog = Vec::new();
loop {
@ -86,7 +100,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing if: Expected '{{'")
}
let body_true = self.parse();
let body_true = self.parse_scoped_block();
if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing if: Expected '}}'")
@ -101,7 +115,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing if: Expected '{{'")
}
body_false = self.parse();
body_false = self.parse_scoped_block();
if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing if: Expected '}}'")
@ -128,7 +142,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
match self.next() {
Token::LBraces => {
body = self.parse();
body = self.parse_scoped_block();
}
Token::Semicolon => {
@ -138,7 +152,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
panic!("Error lexing loop: Expected '{{'")
}
body = self.parse();
body = self.parse_scoped_block();
}
_ => panic!("Error lexing loop: Expected ';' or '{{'"),
@ -196,9 +210,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Token::I64(val) => Expression::I64(val),
// Literal String
Token::String(text) => Expression::String(text.into()),
Token::String(text) => Expression::String(self.stringstore.intern_or_lookup(&text)),
Token::Ident(name) => Expression::Var(name),
Token::Ident(name) => Expression::Var(self.stringstore.intern_or_lookup(&name)),
// Parentheses grouping
Token::LParen => {
@ -248,10 +262,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
#[cfg(test)]
mod tests {
use super::{parse, BinOpType, Expression};
use crate::{
parser::Statement,
token::Token,
};
use crate::{parser::Statement, token::Token};
#[test]
fn test_parser() {
@ -287,6 +298,6 @@ mod tests {
let expected = vec![expected];
let actual = parse(tokens);
assert_eq!(expected, actual);
assert_eq!(expected, actual.main);
}
}

31
src/stringstore.rs Normal file
View File

@ -0,0 +1,31 @@
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Sid(usize);
#[derive(Clone, Default)]
pub struct StringStore {
strings: Vec<String>,
sids: HashMap<String, Sid>,
}
impl StringStore {
pub fn new() -> Self {
Self { strings: Vec::new(), sids: HashMap::new() }
}
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
self.sids.get(text).copied().unwrap_or_else(|| {
let sid = Sid(self.strings.len());
self.strings.push(text.to_string());
self.sids.insert(text.to_string(), sid);
sid
})
}
pub fn lookup(&self, sid: Sid) -> Option<&String> {
self.strings.get(sid.0)
}
}