Implement string interning
This commit is contained in:
parent
4dbc3adfd5
commit
d4c6f3d5dc
12
src/ast.rs
12
src/ast.rs
@ -1,4 +1,4 @@
|
|||||||
use std::rc::Rc;
|
use crate::stringstore::{StringStore, Sid};
|
||||||
|
|
||||||
/// Types for binary operators
|
/// Types for binary operators
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
@ -81,9 +81,9 @@ pub enum Expression {
|
|||||||
/// Integer literal (64-bit)
|
/// Integer literal (64-bit)
|
||||||
I64(i64),
|
I64(i64),
|
||||||
/// String literal
|
/// String literal
|
||||||
String(Rc<String>),
|
String(Sid),
|
||||||
/// Variable
|
/// Variable
|
||||||
Var(String),
|
Var(Sid),
|
||||||
/// Binary operation. Consists of type, left hand side and right hand side
|
/// Binary operation. Consists of type, left hand side and right hand side
|
||||||
BinOp(BinOpType, Box<Expression>, Box<Expression>),
|
BinOp(BinOpType, Box<Expression>, Box<Expression>),
|
||||||
/// Unary operation. Consists of type and operand
|
/// Unary operation. Consists of type and operand
|
||||||
@ -120,6 +120,12 @@ pub enum Statement {
|
|||||||
|
|
||||||
pub type BlockScope = Vec<Statement>;
|
pub type BlockScope = Vec<Statement>;
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct Ast {
|
||||||
|
pub stringstore: StringStore,
|
||||||
|
pub main: BlockScope,
|
||||||
|
}
|
||||||
|
|
||||||
impl BinOpType {
|
impl BinOpType {
|
||||||
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
|
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
|
||||||
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
|
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
|
||||||
|
|||||||
@ -1,15 +1,13 @@
|
|||||||
use std::{fmt::Display, rc::Rc};
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType},
|
ast::{BlockScope, BinOpType, Expression, If, Statement, UnOpType},
|
||||||
lexer::lex,
|
lexer::lex,
|
||||||
parser::parse,
|
parser::parse, stringstore::{Sid, StringStore},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
I64(i64),
|
I64(i64),
|
||||||
String(Rc<String>),
|
String(Sid),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
@ -17,7 +15,9 @@ pub struct Interpreter {
|
|||||||
capture_output: bool,
|
capture_output: bool,
|
||||||
output: Vec<Value>,
|
output: Vec<Value>,
|
||||||
// Variable table stores the runtime values of variables
|
// Variable table stores the runtime values of variables
|
||||||
vartable: Vec<(String, Value)>,
|
vartable: Vec<(Sid, Value)>,
|
||||||
|
|
||||||
|
stringstore: StringStore,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Interpreter {
|
impl Interpreter {
|
||||||
@ -33,7 +33,7 @@ impl Interpreter {
|
|||||||
&self.output
|
&self.output
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_var(&self, name: &str) -> Option<Value> {
|
fn get_var(&self, name: Sid) -> Option<Value> {
|
||||||
self.vartable
|
self.vartable
|
||||||
.iter()
|
.iter()
|
||||||
.rev()
|
.rev()
|
||||||
@ -41,7 +41,7 @@ impl Interpreter {
|
|||||||
.map(|it| it.1.clone())
|
.map(|it| it.1.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_var_mut(&mut self, name: &str) -> Option<&mut Value> {
|
fn get_var_mut(&mut self, name: Sid) -> Option<&mut Value> {
|
||||||
self.vartable
|
self.vartable
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.rev()
|
.rev()
|
||||||
@ -57,13 +57,15 @@ impl Interpreter {
|
|||||||
|
|
||||||
let ast = parse(tokens);
|
let ast = parse(tokens);
|
||||||
if print_ast {
|
if print_ast {
|
||||||
println!("{:#?}", ast);
|
println!("{:#?}", ast.main);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.run(&ast);
|
self.stringstore = ast.stringstore;
|
||||||
|
|
||||||
|
self.run_block(&ast.main);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(&mut self, prog: &BlockScope) {
|
pub fn run_block(&mut self, prog: &BlockScope) {
|
||||||
let vartable_len = self.vartable.len();
|
let vartable_len = self.vartable.len();
|
||||||
for stmt in prog {
|
for stmt in prog {
|
||||||
match stmt {
|
match stmt {
|
||||||
@ -78,7 +80,7 @@ impl Interpreter {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.run(&looop.body);
|
self.run_block(&looop.body);
|
||||||
|
|
||||||
if let Some(adv) = &looop.advancement {
|
if let Some(adv) = &looop.advancement {
|
||||||
self.resolve_expr(&adv);
|
self.resolve_expr(&adv);
|
||||||
@ -92,7 +94,7 @@ impl Interpreter {
|
|||||||
if self.capture_output {
|
if self.capture_output {
|
||||||
self.output.push(result)
|
self.output.push(result)
|
||||||
} else {
|
} else {
|
||||||
print!("{}", result);
|
self.print_value(&result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,9 +104,9 @@ impl Interpreter {
|
|||||||
body_false,
|
body_false,
|
||||||
}) => {
|
}) => {
|
||||||
if matches!(self.resolve_expr(condition), Value::I64(0)) {
|
if matches!(self.resolve_expr(condition), Value::I64(0)) {
|
||||||
self.run(body_false);
|
self.run_block(body_false);
|
||||||
} else {
|
} else {
|
||||||
self.run(body_true);
|
self.run_block(body_true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,14 +121,14 @@ impl Interpreter {
|
|||||||
Expression::String(text) => Value::String(text.clone()),
|
Expression::String(text) => Value::String(text.clone()),
|
||||||
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
|
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
|
||||||
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
|
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
|
||||||
Expression::Var(name) => self.resolve_var(name),
|
Expression::Var(name) => self.resolve_var(*name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_var(&mut self, name: &str) -> Value {
|
fn resolve_var(&mut self, name: Sid) -> Value {
|
||||||
match self.get_var(name) {
|
match self.get_var(name) {
|
||||||
Some(val) => val.clone(),
|
Some(val) => val.clone(),
|
||||||
None => panic!("Variable '{}' used but not declared", name),
|
None => panic!("Variable '{}' used but not declared", self.stringstore.lookup(name).unwrap()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,7 +152,7 @@ impl Interpreter {
|
|||||||
return rhs;
|
return rhs;
|
||||||
}
|
}
|
||||||
(BinOpType::Assign, Expression::Var(name)) => {
|
(BinOpType::Assign, Expression::Var(name)) => {
|
||||||
match self.get_var_mut(name) {
|
match self.get_var_mut(*name) {
|
||||||
Some(val) => *val = rhs.clone(),
|
Some(val) => *val = rhs.clone(),
|
||||||
None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
|
None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
|
||||||
}
|
}
|
||||||
@ -187,15 +189,14 @@ impl Interpreter {
|
|||||||
_ => panic!("Value types are not compatible"),
|
_ => panic!("Value types are not compatible"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for Value {
|
fn print_value(&self, val: &Value) {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
match val {
|
||||||
match self {
|
Value::I64(val) => print!("{}", val),
|
||||||
Value::I64(val) => write!(f, "{}", val),
|
Value::String(text) => print!("{}", self.stringstore.lookup(*text).unwrap()),
|
||||||
Value::String(text) => write!(f, "{}", text),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@ -3,6 +3,7 @@ pub mod interpreter;
|
|||||||
pub mod lexer;
|
pub mod lexer;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod token;
|
pub mod token;
|
||||||
|
pub mod stringstore;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
@ -94,3 +95,4 @@ mod tests {
|
|||||||
assert_eq!(interpreter.output(), &expected_output);
|
assert_eq!(interpreter.output(), &expected_output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,28 +1,42 @@
|
|||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
use crate::ast::*;
|
use crate::ast::*;
|
||||||
|
use crate::stringstore::StringStore;
|
||||||
use crate::token::Token;
|
use crate::token::Token;
|
||||||
|
|
||||||
/// Parse the given tokens into an abstract syntax tree
|
/// Parse the given tokens into an abstract syntax tree
|
||||||
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> BlockScope {
|
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
|
||||||
let mut parser = Parser::new(tokens);
|
let parser = Parser::new(tokens);
|
||||||
parser.parse()
|
parser.parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Parser<T: Iterator<Item = Token>> {
|
struct Parser<T: Iterator<Item = Token>> {
|
||||||
tokens: Peekable<T>,
|
tokens: Peekable<T>,
|
||||||
|
stringstore: StringStore,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Iterator<Item = Token>> Parser<T> {
|
impl<T: Iterator<Item = Token>> Parser<T> {
|
||||||
/// Create a new parser to parse the given Token Stream
|
/// Create a new parser to parse the given Token Stream
|
||||||
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
pub fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
|
||||||
let tokens = tokens.into_iter().peekable();
|
let tokens = tokens.into_iter().peekable();
|
||||||
Self { tokens }
|
let stringstore = StringStore::new();
|
||||||
|
Self {
|
||||||
|
tokens,
|
||||||
|
stringstore,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(mut self) -> Ast {
|
||||||
|
let main = self.parse_scoped_block();
|
||||||
|
Ast {
|
||||||
|
main,
|
||||||
|
stringstore: self.stringstore,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
|
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
|
||||||
/// encountering end-of-file or a block end '}' .
|
/// encountering end-of-file or a block end '}' .
|
||||||
fn parse(&mut self) -> BlockScope {
|
fn parse_scoped_block(&mut self) -> BlockScope {
|
||||||
let mut prog = Vec::new();
|
let mut prog = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
@ -86,7 +100,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
panic!("Error lexing if: Expected '{{'")
|
panic!("Error lexing if: Expected '{{'")
|
||||||
}
|
}
|
||||||
|
|
||||||
let body_true = self.parse();
|
let body_true = self.parse_scoped_block();
|
||||||
|
|
||||||
if !matches!(self.next(), Token::RBraces) {
|
if !matches!(self.next(), Token::RBraces) {
|
||||||
panic!("Error lexing if: Expected '}}'")
|
panic!("Error lexing if: Expected '}}'")
|
||||||
@ -101,7 +115,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
panic!("Error lexing if: Expected '{{'")
|
panic!("Error lexing if: Expected '{{'")
|
||||||
}
|
}
|
||||||
|
|
||||||
body_false = self.parse();
|
body_false = self.parse_scoped_block();
|
||||||
|
|
||||||
if !matches!(self.next(), Token::RBraces) {
|
if !matches!(self.next(), Token::RBraces) {
|
||||||
panic!("Error lexing if: Expected '}}'")
|
panic!("Error lexing if: Expected '}}'")
|
||||||
@ -128,7 +142,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
|
|
||||||
match self.next() {
|
match self.next() {
|
||||||
Token::LBraces => {
|
Token::LBraces => {
|
||||||
body = self.parse();
|
body = self.parse_scoped_block();
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Semicolon => {
|
Token::Semicolon => {
|
||||||
@ -138,7 +152,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
panic!("Error lexing loop: Expected '{{'")
|
panic!("Error lexing loop: Expected '{{'")
|
||||||
}
|
}
|
||||||
|
|
||||||
body = self.parse();
|
body = self.parse_scoped_block();
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => panic!("Error lexing loop: Expected ';' or '{{'"),
|
_ => panic!("Error lexing loop: Expected ';' or '{{'"),
|
||||||
@ -196,9 +210,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
Token::I64(val) => Expression::I64(val),
|
Token::I64(val) => Expression::I64(val),
|
||||||
|
|
||||||
// Literal String
|
// Literal String
|
||||||
Token::String(text) => Expression::String(text.into()),
|
Token::String(text) => Expression::String(self.stringstore.intern_or_lookup(&text)),
|
||||||
|
|
||||||
Token::Ident(name) => Expression::Var(name),
|
Token::Ident(name) => Expression::Var(self.stringstore.intern_or_lookup(&name)),
|
||||||
|
|
||||||
// Parentheses grouping
|
// Parentheses grouping
|
||||||
Token::LParen => {
|
Token::LParen => {
|
||||||
@ -248,10 +262,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{parse, BinOpType, Expression};
|
use super::{parse, BinOpType, Expression};
|
||||||
use crate::{
|
use crate::{parser::Statement, token::Token};
|
||||||
parser::Statement,
|
|
||||||
token::Token,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parser() {
|
fn test_parser() {
|
||||||
@ -287,6 +298,6 @@ mod tests {
|
|||||||
let expected = vec![expected];
|
let expected = vec![expected];
|
||||||
|
|
||||||
let actual = parse(tokens);
|
let actual = parse(tokens);
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual.main);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
31
src/stringstore.rs
Normal file
31
src/stringstore.rs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Sid(usize);
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct StringStore {
|
||||||
|
strings: Vec<String>,
|
||||||
|
sids: HashMap<String, Sid>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringStore {
|
||||||
|
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self { strings: Vec::new(), sids: HashMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
|
||||||
|
self.sids.get(text).copied().unwrap_or_else(|| {
|
||||||
|
let sid = Sid(self.strings.len());
|
||||||
|
self.strings.push(text.to_string());
|
||||||
|
self.sids.insert(text.to_string(), sid);
|
||||||
|
sid
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lookup(&self, sid: Sid) -> Option<&String> {
|
||||||
|
self.strings.get(sid.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user