Implement string interning
This commit is contained in:
parent
4dbc3adfd5
commit
d4c6f3d5dc
12
src/ast.rs
12
src/ast.rs
@ -1,4 +1,4 @@
|
||||
use std::rc::Rc;
|
||||
use crate::stringstore::{StringStore, Sid};
|
||||
|
||||
/// Types for binary operators
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
@ -81,9 +81,9 @@ pub enum Expression {
|
||||
/// Integer literal (64-bit)
|
||||
I64(i64),
|
||||
/// String literal
|
||||
String(Rc<String>),
|
||||
String(Sid),
|
||||
/// Variable
|
||||
Var(String),
|
||||
Var(Sid),
|
||||
/// Binary operation. Consists of type, left hand side and right hand side
|
||||
BinOp(BinOpType, Box<Expression>, Box<Expression>),
|
||||
/// Unary operation. Consists of type and operand
|
||||
@ -120,6 +120,12 @@ pub enum Statement {
|
||||
|
||||
pub type BlockScope = Vec<Statement>;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Ast {
|
||||
pub stringstore: StringStore,
|
||||
pub main: BlockScope,
|
||||
}
|
||||
|
||||
impl BinOpType {
|
||||
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
|
||||
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
|
||||
|
||||
@ -1,15 +1,13 @@
|
||||
use std::{fmt::Display, rc::Rc};
|
||||
|
||||
use crate::{
|
||||
ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType},
|
||||
lexer::lex,
|
||||
parser::parse,
|
||||
parser::parse, stringstore::{Sid, StringStore},
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub enum Value {
|
||||
I64(i64),
|
||||
String(Rc<String>),
|
||||
String(Sid),
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@ -17,7 +15,9 @@ pub struct Interpreter {
|
||||
capture_output: bool,
|
||||
output: Vec<Value>,
|
||||
// Variable table stores the runtime values of variables
|
||||
vartable: Vec<(String, Value)>,
|
||||
vartable: Vec<(Sid, Value)>,
|
||||
|
||||
stringstore: StringStore,
|
||||
}
|
||||
|
||||
impl Interpreter {
|
||||
@ -33,7 +33,7 @@ impl Interpreter {
|
||||
&self.output
|
||||
}
|
||||
|
||||
fn get_var(&self, name: &str) -> Option<Value> {
|
||||
fn get_var(&self, name: Sid) -> Option<Value> {
|
||||
self.vartable
|
||||
.iter()
|
||||
.rev()
|
||||
@ -41,7 +41,7 @@ impl Interpreter {
|
||||
.map(|it| it.1.clone())
|
||||
}
|
||||
|
||||
fn get_var_mut(&mut self, name: &str) -> Option<&mut Value> {
|
||||
fn get_var_mut(&mut self, name: Sid) -> Option<&mut Value> {
|
||||
self.vartable
|
||||
.iter_mut()
|
||||
.rev()
|
||||
@ -57,13 +57,15 @@ impl Interpreter {
|
||||
|
||||
let ast = parse(tokens);
|
||||
if print_ast {
|
||||
println!("{:#?}", ast);
|
||||
println!("{:#?}", ast.main);
|
||||
}
|
||||
|
||||
self.run(&ast);
|
||||
self.stringstore = ast.stringstore;
|
||||
|
||||
self.run_block(&ast.main);
|
||||
}
|
||||
|
||||
pub fn run(&mut self, prog: &BlockScope) {
|
||||
pub fn run_block(&mut self, prog: &BlockScope) {
|
||||
let vartable_len = self.vartable.len();
|
||||
for stmt in prog {
|
||||
match stmt {
|
||||
@ -78,7 +80,7 @@ impl Interpreter {
|
||||
break;
|
||||
}
|
||||
|
||||
self.run(&looop.body);
|
||||
self.run_block(&looop.body);
|
||||
|
||||
if let Some(adv) = &looop.advancement {
|
||||
self.resolve_expr(&adv);
|
||||
@ -92,7 +94,7 @@ impl Interpreter {
|
||||
if self.capture_output {
|
||||
self.output.push(result)
|
||||
} else {
|
||||
print!("{}", result);
|
||||
self.print_value(&result);
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,9 +104,9 @@ impl Interpreter {
|
||||
body_false,
|
||||
}) => {
|
||||
if matches!(self.resolve_expr(condition), Value::I64(0)) {
|
||||
self.run(body_false);
|
||||
self.run_block(body_false);
|
||||
} else {
|
||||
self.run(body_true);
|
||||
self.run_block(body_true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -119,14 +121,14 @@ impl Interpreter {
|
||||
Expression::String(text) => Value::String(text.clone()),
|
||||
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
|
||||
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
|
||||
Expression::Var(name) => self.resolve_var(name),
|
||||
Expression::Var(name) => self.resolve_var(*name),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_var(&mut self, name: &str) -> Value {
|
||||
fn resolve_var(&mut self, name: Sid) -> Value {
|
||||
match self.get_var(name) {
|
||||
Some(val) => val.clone(),
|
||||
None => panic!("Variable '{}' used but not declared", name),
|
||||
None => panic!("Variable '{}' used but not declared", self.stringstore.lookup(name).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,7 +152,7 @@ impl Interpreter {
|
||||
return rhs;
|
||||
}
|
||||
(BinOpType::Assign, Expression::Var(name)) => {
|
||||
match self.get_var_mut(name) {
|
||||
match self.get_var_mut(*name) {
|
||||
Some(val) => *val = rhs.clone(),
|
||||
None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
|
||||
}
|
||||
@ -187,15 +189,14 @@ impl Interpreter {
|
||||
_ => panic!("Value types are not compatible"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Value {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Value::I64(val) => write!(f, "{}", val),
|
||||
Value::String(text) => write!(f, "{}", text),
|
||||
fn print_value(&self, val: &Value) {
|
||||
match val {
|
||||
Value::I64(val) => print!("{}", val),
|
||||
Value::String(text) => print!("{}", self.stringstore.lookup(*text).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@ -3,6 +3,7 @@ pub mod interpreter;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod token;
|
||||
pub mod stringstore;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@ -94,3 +95,4 @@ mod tests {
|
||||
assert_eq!(interpreter.output(), &expected_output);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,28 +1,42 @@
|
||||
use std::iter::Peekable;
|
||||
|
||||
use crate::ast::*;
|
||||
use crate::stringstore::StringStore;
|
||||
use crate::token::Token;
|
||||
|
||||
/// Parse the given tokens into an abstract syntax tree
|
||||
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> BlockScope {
|
||||
let mut parser = Parser::new(tokens);
|
||||
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
|
||||
let parser = Parser::new(tokens);
|
||||
parser.parse()
|
||||
}
|
||||
|
||||
struct Parser<T: Iterator<Item = Token>> {
|
||||
tokens: Peekable<T>,
|
||||
stringstore: StringStore,
|
||||
}
|
||||
|
||||
impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
/// Create a new parser to parse the given Token Stream
|
||||
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
||||
pub fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
||||
let tokens = tokens.into_iter().peekable();
|
||||
Self { tokens }
|
||||
let stringstore = StringStore::new();
|
||||
Self {
|
||||
tokens,
|
||||
stringstore,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(mut self) -> Ast {
|
||||
let main = self.parse_scoped_block();
|
||||
Ast {
|
||||
main,
|
||||
stringstore: self.stringstore,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
|
||||
/// encountering end-of-file or a block end '}' .
|
||||
fn parse(&mut self) -> BlockScope {
|
||||
fn parse_scoped_block(&mut self) -> BlockScope {
|
||||
let mut prog = Vec::new();
|
||||
|
||||
loop {
|
||||
@ -86,7 +100,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
panic!("Error lexing if: Expected '{{'")
|
||||
}
|
||||
|
||||
let body_true = self.parse();
|
||||
let body_true = self.parse_scoped_block();
|
||||
|
||||
if !matches!(self.next(), Token::RBraces) {
|
||||
panic!("Error lexing if: Expected '}}'")
|
||||
@ -101,7 +115,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
panic!("Error lexing if: Expected '{{'")
|
||||
}
|
||||
|
||||
body_false = self.parse();
|
||||
body_false = self.parse_scoped_block();
|
||||
|
||||
if !matches!(self.next(), Token::RBraces) {
|
||||
panic!("Error lexing if: Expected '}}'")
|
||||
@ -128,7 +142,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
|
||||
match self.next() {
|
||||
Token::LBraces => {
|
||||
body = self.parse();
|
||||
body = self.parse_scoped_block();
|
||||
}
|
||||
|
||||
Token::Semicolon => {
|
||||
@ -138,7 +152,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
panic!("Error lexing loop: Expected '{{'")
|
||||
}
|
||||
|
||||
body = self.parse();
|
||||
body = self.parse_scoped_block();
|
||||
}
|
||||
|
||||
_ => panic!("Error lexing loop: Expected ';' or '{{'"),
|
||||
@ -196,9 +210,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
Token::I64(val) => Expression::I64(val),
|
||||
|
||||
// Literal String
|
||||
Token::String(text) => Expression::String(text.into()),
|
||||
Token::String(text) => Expression::String(self.stringstore.intern_or_lookup(&text)),
|
||||
|
||||
Token::Ident(name) => Expression::Var(name),
|
||||
Token::Ident(name) => Expression::Var(self.stringstore.intern_or_lookup(&name)),
|
||||
|
||||
// Parentheses grouping
|
||||
Token::LParen => {
|
||||
@ -248,10 +262,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{parse, BinOpType, Expression};
|
||||
use crate::{
|
||||
parser::Statement,
|
||||
token::Token,
|
||||
};
|
||||
use crate::{parser::Statement, token::Token};
|
||||
|
||||
#[test]
|
||||
fn test_parser() {
|
||||
@ -287,6 +298,6 @@ mod tests {
|
||||
let expected = vec![expected];
|
||||
|
||||
let actual = parse(tokens);
|
||||
assert_eq!(expected, actual);
|
||||
assert_eq!(expected, actual.main);
|
||||
}
|
||||
}
|
||||
|
||||
31
src/stringstore.rs
Normal file
31
src/stringstore.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Sid(usize);
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct StringStore {
|
||||
strings: Vec<String>,
|
||||
sids: HashMap<String, Sid>,
|
||||
}
|
||||
|
||||
impl StringStore {
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self { strings: Vec::new(), sids: HashMap::new() }
|
||||
}
|
||||
|
||||
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
|
||||
self.sids.get(text).copied().unwrap_or_else(|| {
|
||||
let sid = Sid(self.strings.len());
|
||||
self.strings.push(text.to_string());
|
||||
self.sids.insert(text.to_string(), sid);
|
||||
sid
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lookup(&self, sid: Sid) -> Option<&String> {
|
||||
self.strings.get(sid.0)
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user