Add a few more comments

This commit is contained in:
Daniel M 2022-02-11 18:34:46 +01:00
parent 5f720ad7c3
commit 70c9d073f9
9 changed files with 310 additions and 64 deletions

View File

@ -1,80 +1,82 @@
use std::rc::Rc; use std::rc::Rc;
use crate::stringstore::{StringStore, Sid}; use crate::stringstore::{Sid, StringStore};
/// Types for binary operators /// Types for binary operations
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType { pub enum BinOpType {
/// Addition /// Addition ("+")
Add, Add,
/// Subtraction /// Subtraction ("-")
Sub, Sub,
/// Multiplication /// Multiplication ("*")
Mul, Mul,
/// Divide /// Division ("/")
Div, Div,
/// Modulo /// Modulo / Remainder ("%")
Mod, Mod,
/// Compare Equal /// Compare Equal ("==")
EquEqu, EquEqu,
/// Compare Not Equal /// Compare Not Equal ("!=")
NotEqu, NotEqu,
/// Less than /// Compare Less than ("<")
Less, Less,
/// Less than or Equal /// Compare Less than or Equal ("<=")
LessEqu, LessEqu,
/// Greater than /// Compare Greater than (">")
Greater, Greater,
/// Greater than or Equal /// Compare Greater than or Equal (">=")
GreaterEqu, GreaterEqu,
/// Bitwise OR (inclusive or) /// Bitwise Or ("|")
BOr, BOr,
/// Bitwise And /// Bitwise And ("&")
BAnd, BAnd,
/// Bitwise Xor (exclusive or) /// Bitwise Xor / Exclusive Or ("^")
BXor, BXor,
/// Logical And /// Logical And ("&&")
LAnd, LAnd,
/// Logical Or /// Logical Or ("||")
LOr, LOr,
/// Shift Left /// Bitwise Shift Left ("<<")
Shl, Shl,
/// Shift Right /// Bitwise Shift Right (">>")
Shr, Shr,
/// Assign value to variable /// Assign value to variable ("=")
Assign, Assign,
} }
/// Types for unary operations
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum UnOpType { pub enum UnOpType {
/// Unary Negate /// Unary Negation ("-")
Negate, Negate,
/// Bitwise Not /// Bitwise Not / Bitflip ("~")
BNot, BNot,
/// Logical Not /// Logical Not ("!")
LNot, LNot,
} }
/// Ast Node for possible Expression variants
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Expression { pub enum Expression {
/// Integer literal (64-bit) /// Integer literal (64-bit)
@ -82,15 +84,16 @@ pub enum Expression {
/// String literal /// String literal
String(Sid), String(Sid),
/// Array with size /// Array with size as an expression
ArrayLiteral(Box<Expression>), ArrayLiteral(Box<Expression>),
/// Array access with name, stackpos and position as expression
/// Array access with name, stackpos and position
ArrayAccess(Sid, usize, Box<Expression>), ArrayAccess(Sid, usize, Box<Expression>),
/// Function call with name, stackpos and the arguments as a vec of expressions
FunCall(Sid, usize, Vec<Expression>), FunCall(Sid, usize, Vec<Expression>),
/// Variable /// Variable with name and the stackpos from behind. This means that stackpos 0 refers to the
/// last variable on the stack and not the first
Var(Sid, usize), Var(Sid, usize),
/// Binary operation. Consists of type, left hand side and right hand side /// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expression>, Box<Expression>), BinOp(BinOpType, Box<Expression>, Box<Expression>),
@ -98,6 +101,7 @@ pub enum Expression {
UnOp(UnOpType, Box<Expression>), UnOp(UnOpType, Box<Expression>),
} }
/// Ast Node for a loop
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct Loop { pub struct Loop {
/// The condition that determines if the loop should continue /// The condition that determines if the loop should continue
@ -108,6 +112,7 @@ pub struct Loop {
pub body: BlockScope, pub body: BlockScope,
} }
/// Ast Node for an if
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct If { pub struct If {
/// The condition /// The condition
@ -118,40 +123,65 @@ pub struct If {
pub body_false: BlockScope, pub body_false: BlockScope,
} }
/// Ast Node for a function declaration
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct FunDecl { pub struct FunDecl {
/// The function name as StringID, stored in the stringstore
pub name: Sid, pub name: Sid,
/// The absolute position on the function stack where the function is stored
pub fun_stackpos: usize, pub fun_stackpos: usize,
/// The argument names as StringIDs
pub argnames: Vec<Sid>, pub argnames: Vec<Sid>,
/// The function body
pub body: Rc<BlockScope>, pub body: Rc<BlockScope>,
} }
/// Ast Node for a variable declaration
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct VarDecl { pub struct VarDecl {
/// The variable name as StringID, stored in the stringstore
pub name: Sid, pub name: Sid,
/// The absolute position on the variable stack where the variable is stored
pub var_stackpos: usize, pub var_stackpos: usize,
/// The right hand side that generates the initial value for the variable
pub rhs: Expression, pub rhs: Expression,
} }
/// Ast Node for the possible Statement variants
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Statement { pub enum Statement {
/// Return from a function with the given result value as an expression
Return(Expression), Return(Expression),
/// Break out of the current loop
Break, Break,
/// End the current loop iteration early and continue with the next loop iteration
Continue, Continue,
/// A variable declaration
Declaration(VarDecl), Declaration(VarDecl),
/// A function declaration
FunDeclare(FunDecl), FunDeclare(FunDecl),
/// A simple expression. This could be a function call or an assignment for example
Expr(Expression), Expr(Expression),
/// A freestanding block scope
Block(BlockScope), Block(BlockScope),
/// A loop
Loop(Loop), Loop(Loop),
/// An if
If(If), If(If),
/// A print statement that will output the value of the given expression to the terminal
Print(Expression), Print(Expression),
} }
/// A number of statements that form a block of code together
pub type BlockScope = Vec<Statement>; pub type BlockScope = Vec<Statement>;
/// A full abstract syntax tree
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct Ast { pub struct Ast {
/// The stringstore contains the actual string values which are replaced with StringIDs in the
/// Ast. So this is needed to get the actual strings later
pub stringstore: StringStore, pub stringstore: StringStore,
/// The main (top-level) code given as a number of statements
pub main: BlockScope, pub main: BlockScope,
} }

View File

@ -1,9 +1,14 @@
use crate::ast::{Ast, BlockScope, Expression, If, Loop, Statement, BinOpType, UnOpType, VarDecl}; use crate::ast::{Ast, BlockScope, Expression, If, Loop, Statement, BinOpType, UnOpType, VarDecl};
/// A trait that allows to optimize an abstract syntax tree
pub trait AstOptimizer { pub trait AstOptimizer {
/// Consume an abstract syntax tree and return an ast that has the same functionality but with
/// optional optimizations.
fn optimize(ast: Ast) -> Ast; fn optimize(ast: Ast) -> Ast;
} }
/// A very simple optimizer that applies trivial optimizations like precalculation expressions that
/// have only literals as operands
pub struct SimpleAstOptimizer; pub struct SimpleAstOptimizer;
impl AstOptimizer for SimpleAstOptimizer { impl AstOptimizer for SimpleAstOptimizer {

View File

@ -10,6 +10,7 @@ use crate::{
stringstore::{Sid, StringStore}, stringstore::{Sid, StringStore},
}; };
/// Runtime errors that can occur during execution
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum RuntimeError { pub enum RuntimeError {
#[error("Invalid array Index: {0:?}")] #[error("Invalid array Index: {0:?}")]
@ -37,41 +38,62 @@ pub enum RuntimeError {
InvalidNumberOfArgs(String, usize, usize), InvalidNumberOfArgs(String, usize, usize),
} }
/// Possible variants for the values
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value { pub enum Value {
/// 64-bit integer value
I64(i64), I64(i64),
/// String value
String(Sid), String(Sid),
/// Array value
Array(Rc<RefCell<Vec<Value>>>), Array(Rc<RefCell<Vec<Value>>>),
/// Void value
Void, Void,
} }
/// The exit type of a block. When a block ends, the exit type specified why the block ended.
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum BlockExit { pub enum BlockExit {
/// Normal exit when the block just ends normally (no returns / breaks / continues / etc.)
Normal, Normal,
/// The block ended through a break statement. This will be propagated up to the next loop
/// and cause it to fully terminate
Break, Break,
/// The block ended through a continue statement. This will be propagated up to the next loop
/// and cause it to start the next iteration
Continue, Continue,
/// The block ended through a return statement. This will propagate up to the next function
/// body end
Return(Value), Return(Value),
} }
#[derive(Default)] #[derive(Default)]
pub struct Interpreter { pub struct Interpreter {
/// Run the SimpleAstOptimizer over the Ast before executing
pub optimize_ast: bool, pub optimize_ast: bool,
/// Print the tokens after lexing
pub print_tokens: bool, pub print_tokens: bool,
/// Print the ast after parsing
pub print_ast: bool, pub print_ast: bool,
/// Capture the output values of print statements instead of printing them to the terminal
pub capture_output: bool, pub capture_output: bool,
/// The stored values that were captured
output: Vec<Value>, output: Vec<Value>,
// Variable table stores the runtime values of variables /// Variable table stores the runtime values of variables as a stack
vartable: Vec<Value>, vartable: Vec<Value>,
/// Function table stores the functions during runtime as a stack
funtable: Vec<FunDecl>, funtable: Vec<FunDecl>,
/// The stringstore contains all strings used throughout the program
stringstore: StringStore, stringstore: StringStore,
} }
impl Interpreter { impl Interpreter {
/// Create a new Interpreter
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
optimize_ast: true, optimize_ast: true,
@ -79,20 +101,28 @@ impl Interpreter {
} }
} }
/// Get the captured output
pub fn output(&self) -> &[Value] { pub fn output(&self) -> &[Value] {
&self.output &self.output
} }
/// Try to retrieve a variable value from the varstack. The idx is the index from the back of
/// the stack. So 0 is the last value, not the first
fn get_var(&self, idx: usize) -> Option<Value> { fn get_var(&self, idx: usize) -> Option<Value> {
self.vartable.get(self.vartable.len() - idx - 1).cloned() self.vartable.get(self.vartable.len() - idx - 1).cloned()
} }
/// Try to retrieve a mutable reference to a variable value from the varstack. The idx is the
/// index from the back of the stack. So 0 is the last value, not the first
fn get_var_mut(&mut self, idx: usize) -> Option<&mut Value> { fn get_var_mut(&mut self, idx: usize) -> Option<&mut Value> {
let idx = self.vartable.len() - idx - 1; let idx = self.vartable.len() - idx - 1;
self.vartable.get_mut(idx) self.vartable.get_mut(idx)
} }
/// Lex, parse and then run the given sourecode. This will terminate the program when an error
/// occurs and print an appropriate error message.
pub fn run_str(&mut self, code: &str) { pub fn run_str(&mut self, code: &str) {
// Lex the tokens
let tokens = match lex(code) { let tokens = match lex(code) {
Ok(tokens) => tokens, Ok(tokens) => tokens,
Err(e) => nice_panic!("Lexing error: {}", e), Err(e) => nice_panic!("Lexing error: {}", e),
@ -102,18 +132,22 @@ impl Interpreter {
println!("Tokens: {:?}", tokens); println!("Tokens: {:?}", tokens);
} }
// Parse the ast
let ast = match parse(tokens) { let ast = match parse(tokens) {
Ok(ast) => ast, Ok(ast) => ast,
Err(e) => nice_panic!("Parsing error: {}", e), Err(e) => nice_panic!("Parsing error: {}", e),
}; };
// Run the ast
match self.run_ast(ast) { match self.run_ast(ast) {
Ok(_) => (), Ok(_) => (),
Err(e) => nice_panic!("Runtime error: {}", e), Err(e) => nice_panic!("Runtime error: {}", e),
} }
} }
/// Execute the given Ast within the interpreter
pub fn run_ast(&mut self, mut ast: Ast) -> Result<(), RuntimeError> { pub fn run_ast(&mut self, mut ast: Ast) -> Result<(), RuntimeError> {
// Optimize the ast
if self.optimize_ast { if self.optimize_ast {
ast = SimpleAstOptimizer::optimize(ast); ast = SimpleAstOptimizer::optimize(ast);
} }
@ -122,16 +156,22 @@ impl Interpreter {
println!("{:#?}", ast.main); println!("{:#?}", ast.main);
} }
// Take over the stringstore of the given ast
self.stringstore = ast.stringstore; self.stringstore = ast.stringstore;
// Run the top level block (the main)
self.run_block(&ast.main)?; self.run_block(&ast.main)?;
Ok(()) Ok(())
} }
/// Run all statements in the given block
pub fn run_block(&mut self, prog: &BlockScope) -> Result<BlockExit, RuntimeError> { pub fn run_block(&mut self, prog: &BlockScope) -> Result<BlockExit, RuntimeError> {
self.run_block_fp_offset(prog, 0) self.run_block_fp_offset(prog, 0)
} }
/// Same as run_block, but with an additional framepointer offset. This allows to free more
/// values from the stack than normally and can be used when passing arguments inside a
/// function body scope from the outside
pub fn run_block_fp_offset( pub fn run_block_fp_offset(
&mut self, &mut self,
prog: &BlockScope, prog: &BlockScope,
@ -139,7 +179,9 @@ impl Interpreter {
) -> Result<BlockExit, RuntimeError> { ) -> Result<BlockExit, RuntimeError> {
let framepointer = self.vartable.len() - framepointer_offset; let framepointer = self.vartable.len() - framepointer_offset;
for stmt in prog { let mut block_exit = BlockExit::Normal;
'blockloop: for stmt in prog {
match stmt { match stmt {
Statement::Break => return Ok(BlockExit::Break), Statement::Break => return Ok(BlockExit::Break),
Statement::Continue => return Ok(BlockExit::Continue), Statement::Continue => return Ok(BlockExit::Continue),
@ -147,8 +189,8 @@ impl Interpreter {
Statement::Return(expr) => { Statement::Return(expr) => {
let val = self.resolve_expr(expr)?; let val = self.resolve_expr(expr)?;
self.vartable.truncate(framepointer); block_exit = BlockExit::Return(val);
return Ok(BlockExit::Return(val)); break 'blockloop;
} }
Statement::Expr(expr) => { Statement::Expr(expr) => {
@ -163,8 +205,8 @@ impl Interpreter {
Statement::Block(block) => match self.run_block(block)? { Statement::Block(block) => match self.run_block(block)? {
// Propagate return, continue and break // Propagate return, continue and break
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => { be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
self.vartable.truncate(framepointer); block_exit = be;
return Ok(be); break 'blockloop;
} }
_ => (), _ => (),
}, },
@ -172,23 +214,26 @@ impl Interpreter {
Statement::Loop(looop) => { Statement::Loop(looop) => {
// loop runs as long condition != 0 // loop runs as long condition != 0
loop { loop {
// Check the loop condition
if let Some(condition) = &looop.condition { if let Some(condition) = &looop.condition {
if matches!(self.resolve_expr(condition)?, Value::I64(0)) { if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
break; break;
} }
} }
// Run the body
let be = self.run_block(&looop.body)?; let be = self.run_block(&looop.body)?;
match be { match be {
// Propagate return // Propagate return
be @ BlockExit::Return(_) => { be @ BlockExit::Return(_) => {
self.vartable.truncate(framepointer); block_exit = be;
return Ok(be); break 'blockloop;
} }
BlockExit::Break => break, BlockExit::Break => break,
BlockExit::Continue | BlockExit::Normal => (), BlockExit::Continue | BlockExit::Normal => (),
} }
// Run the advancement
if let Some(adv) = &looop.advancement { if let Some(adv) = &looop.advancement {
self.resolve_expr(&adv)?; self.resolve_expr(&adv)?;
} }
@ -210,6 +255,7 @@ impl Interpreter {
body_true, body_true,
body_false, body_false,
}) => { }) => {
// Run the right block depending on the conditions result being 0 or not
let exit = if matches!(self.resolve_expr(condition)?, Value::I64(0)) { let exit = if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
self.run_block(body_false)? self.run_block(body_false)?
} else { } else {
@ -219,8 +265,8 @@ impl Interpreter {
match exit { match exit {
// Propagate return, continue and break // Propagate return, continue and break
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => { be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
self.vartable.truncate(framepointer); block_exit = be;
return Ok(be); break 'blockloop;
} }
_ => (), _ => (),
} }
@ -234,9 +280,10 @@ impl Interpreter {
self.vartable.truncate(framepointer); self.vartable.truncate(framepointer);
Ok(BlockExit::Normal) Ok(block_exit)
} }
/// Execute the given expression to retrieve the resulting value
fn resolve_expr(&mut self, expr: &Expression) -> Result<Value, RuntimeError> { fn resolve_expr(&mut self, expr: &Expression) -> Result<Value, RuntimeError> {
let val = match expr { let val = match expr {
Expression::I64(val) => Value::I64(*val), Expression::I64(val) => Value::I64(*val),
@ -271,6 +318,7 @@ impl Interpreter {
// Function existance has been verified in the parser, so unwrap here shouldn't fail // Function existance has been verified in the parser, so unwrap here shouldn't fail
let expected_num_args = self.funtable.get(*fun_stackpos).unwrap().argnames.len(); let expected_num_args = self.funtable.get(*fun_stackpos).unwrap().argnames.len();
// Check if the number of provided arguments matches the number of expected arguments
if expected_num_args != args_len { if expected_num_args != args_len {
let fun_name = self let fun_name = self
.stringstore .stringstore
@ -284,6 +332,7 @@ impl Interpreter {
)); ));
} }
// Run the function body and return the BlockExit type
match self.run_block_fp_offset( match self.run_block_fp_offset(
&Rc::clone(&self.funtable.get(*fun_stackpos).unwrap().body), &Rc::clone(&self.funtable.get(*fun_stackpos).unwrap().body),
expected_num_args, expected_num_args,
@ -297,17 +346,23 @@ impl Interpreter {
Ok(val) Ok(val)
} }
/// Retrive the value of a given array at the specified index from the varstack. The name is
/// given as a StringID and is used to reference the variable name in case of an error. The
/// idx is the stackpos where the array variable should be located and the arr_idx is the
/// actual array access index, given as an expression.
fn resolve_array_access( fn resolve_array_access(
&mut self, &mut self,
name: Sid, name: Sid,
idx: usize, idx: usize,
arr_idx: &Expression, arr_idx: &Expression,
) -> Result<Value, RuntimeError> { ) -> Result<Value, RuntimeError> {
// Resolve the array index into a value and check if it is a valid array index
let arr_idx = match self.resolve_expr(arr_idx)? { let arr_idx = match self.resolve_expr(arr_idx)? {
Value::I64(size) if !size.is_negative() => size, Value::I64(size) if !size.is_negative() => size,
val => return Err(RuntimeError::InvalidArrayIndex(val)), val => return Err(RuntimeError::InvalidArrayIndex(val)),
}; };
// Get the array value
let val = match self.get_var(idx) { let val = match self.get_var(idx) {
Some(val) => val, Some(val) => val,
None => { None => {
@ -320,6 +375,7 @@ impl Interpreter {
} }
}; };
// Make sure it is an array
let arr = match val { let arr = match val {
Value::Array(arr) => arr, Value::Array(arr) => arr,
_ => { _ => {
@ -332,12 +388,16 @@ impl Interpreter {
} }
}; };
let arr = arr.borrow_mut(); // Get the value of the requested cell inside the array
let arr = arr.borrow();
arr.get(arr_idx as usize) arr.get(arr_idx as usize)
.cloned() .cloned()
.ok_or(RuntimeError::ArrayOutOfBounds(arr_idx as usize, arr.len())) .ok_or(RuntimeError::ArrayOutOfBounds(arr_idx as usize, arr.len()))
} }
/// Retrive the value of a given variable from the varstack. The name is given as a StringID
/// and is used to reference the variable name in case of an error. The idx is the stackpos
/// where the variable should be located
fn resolve_var(&mut self, name: Sid, idx: usize) -> Result<Value, RuntimeError> { fn resolve_var(&mut self, name: Sid, idx: usize) -> Result<Value, RuntimeError> {
match self.get_var(idx) { match self.get_var(idx) {
Some(val) => Ok(val), Some(val) => Ok(val),
@ -352,9 +412,12 @@ impl Interpreter {
} }
} }
/// Execute a unary operation and get the resulting value
fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Result<Value, RuntimeError> { fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Result<Value, RuntimeError> {
// Recursively resolve the operands expression into an actual value
let operand = self.resolve_expr(operand)?; let operand = self.resolve_expr(operand)?;
// Perform the correct operation, considering the operation and value type
Ok(match (operand, uo) { Ok(match (operand, uo) {
(Value::I64(val), UnOpType::Negate) => Value::I64(-val), (Value::I64(val), UnOpType::Negate) => Value::I64(-val),
(Value::I64(val), UnOpType::BNot) => Value::I64(!val), (Value::I64(val), UnOpType::BNot) => Value::I64(!val),
@ -363,6 +426,7 @@ impl Interpreter {
}) })
} }
/// Execute a binary operation and get the resulting value
fn resolve_binop( fn resolve_binop(
&mut self, &mut self,
bo: &BinOpType, bo: &BinOpType,
@ -371,8 +435,11 @@ impl Interpreter {
) -> Result<Value, RuntimeError> { ) -> Result<Value, RuntimeError> {
let rhs = self.resolve_expr(rhs)?; let rhs = self.resolve_expr(rhs)?;
// Handle assignments separate from the other binary operations
match (&bo, &lhs) { match (&bo, &lhs) {
// Normal variable assignment
(BinOpType::Assign, Expression::Var(name, idx)) => { (BinOpType::Assign, Expression::Var(name, idx)) => {
// Get the variable mutably and assign the right hand side value
match self.get_var_mut(*idx) { match self.get_var_mut(*idx) {
Some(val) => *val = rhs.clone(), Some(val) => *val = rhs.clone(),
None => { None => {
@ -384,14 +451,18 @@ impl Interpreter {
)) ))
} }
} }
return Ok(rhs); return Ok(rhs);
} }
// Array index assignment
(BinOpType::Assign, Expression::ArrayAccess(name, idx, arr_idx)) => { (BinOpType::Assign, Expression::ArrayAccess(name, idx, arr_idx)) => {
// Calculate the array index
let arr_idx = match self.resolve_expr(arr_idx)? { let arr_idx = match self.resolve_expr(arr_idx)? {
Value::I64(size) if !size.is_negative() => size, Value::I64(size) if !size.is_negative() => size,
val => return Err(RuntimeError::InvalidArrayIndex(val)), val => return Err(RuntimeError::InvalidArrayIndex(val)),
}; };
// Get the mutable ref to the array variable
let val = match self.get_var_mut(*idx) { let val = match self.get_var_mut(*idx) {
Some(val) => val, Some(val) => val,
None => { None => {
@ -404,7 +475,9 @@ impl Interpreter {
} }
}; };
// Verify that it actually is an array
match val { match val {
// Assign the right hand side value to the array it the given index
Value::Array(arr) => arr.borrow_mut()[arr_idx as usize] = rhs.clone(), Value::Array(arr) => arr.borrow_mut()[arr_idx as usize] = rhs.clone(),
_ => { _ => {
return Err(RuntimeError::TryingToIndexNonArray( return Err(RuntimeError::TryingToIndexNonArray(
@ -421,8 +494,14 @@ impl Interpreter {
_ => (), _ => (),
} }
// This code is only executed if the binop is not an assignment as the assignments return
// early
// Resolve the left hand side to the value
let lhs = self.resolve_expr(lhs)?; let lhs = self.resolve_expr(lhs)?;
// Perform the appropriate calculations considering the operation type and datatypes of the
// two values
let result = match (lhs, rhs) { let result = match (lhs, rhs) {
(Value::I64(lhs), Value::I64(rhs)) => match bo { (Value::I64(lhs), Value::I64(rhs)) => match bo {
BinOpType::Add => Value::I64(lhs + rhs), BinOpType::Add => Value::I64(lhs + rhs),
@ -456,6 +535,8 @@ impl Interpreter {
Ok(result) Ok(result)
} }
/// Get a string representation of the given value. This uses the interpreters StringStore to
/// retrive the text values of Strings
fn value_to_string(&self, val: &Value) -> String { fn value_to_string(&self, val: &Value) -> String {
match val { match val {
Value::I64(val) => format!("{}", val), Value::I64(val) => format!("{}", val),
@ -476,6 +557,8 @@ mod test {
use super::{Interpreter, Value}; use super::{Interpreter, Value};
use crate::ast::{BinOpType, Expression}; use crate::ast::{BinOpType, Expression};
/// Simple test to check if a simple expression is executed properly.
/// Full system tests from lexing to execution can be found in `lib.rs`
#[test] #[test]
fn test_interpreter_expr() { fn test_interpreter_expr() {
// Expression: 1 + 2 * 3 + 4 // Expression: 1 + 2 * 3 + 4

View File

@ -3,6 +3,7 @@ use thiserror::Error;
use crate::{token::Token, T}; use crate::{token::Token, T};
/// Errors that can occur while lexing a given string
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum LexErr { pub enum LexErr {
#[error("Failed to parse '{0}' as i64")] #[error("Failed to parse '{0}' as i64")]
@ -24,8 +25,11 @@ pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
lexer.lex() lexer.lex()
} }
/// The lexer is created from a reference to a sourcecode string and is consumed to create a token
/// buffer from that sourcecode.
struct Lexer<'a> { struct Lexer<'a> {
/// The sourcecode text as an iterator over the chars /// The sourcecode text as a peekable iterator over the chars. Peekable allows for look-ahead
/// and the use of the Chars iterator allows to support unicode characters
code: Peekable<Chars<'a>>, code: Peekable<Chars<'a>>,
/// The lexed tokens /// The lexed tokens
tokens: Vec<Token>, tokens: Vec<Token>,
@ -34,6 +38,8 @@ struct Lexer<'a> {
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
/// Create a new lexer from the given sourcecode
fn new(code: &'a str) -> Self { fn new(code: &'a str) -> Self {
let code = code.chars().peekable(); let code = code.chars().peekable();
let tokens = Vec::new(); let tokens = Vec::new();
@ -45,14 +51,18 @@ impl<'a> Lexer<'a> {
} }
} }
/// Consume the lexer and try to lex the contained sourcecode into a token buffer
fn lex(mut self) -> Result<Vec<Token>, LexErr> { fn lex(mut self) -> Result<Vec<Token>, LexErr> {
loop { loop {
self.current_char = self.next(); self.current_char = self.next();
// Match on the current and next character. This gives a 1-char look-ahead and
// can be used to directly match 2-char tokens
match (self.current_char, self.peek()) { match (self.current_char, self.peek()) {
// Stop lexing at EOF // Stop lexing at EOF
('\0', _) => break, ('\0', _) => break,
// Skip whitespace // Skip / ignore whitespace
(' ' | '\t' | '\n' | '\r', _) => (), (' ' | '\t' | '\n' | '\r', _) => (),
// Line comment. Consume every char until linefeed (next line) // Line comment. Consume every char until linefeed (next line)
@ -100,9 +110,10 @@ impl<'a> Lexer<'a> {
// Lex multiple characters together as a string // Lex multiple characters together as a string
('"', _) => self.lex_str()?, ('"', _) => self.lex_str()?,
// Lex multiple characters together as identifier // Lex multiple characters together as identifier or keyword
('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?, ('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?,
// Any character that was not handled otherwise is invalid
(ch, _) => Err(LexErr::UnexpectedChar(ch))?, (ch, _) => Err(LexErr::UnexpectedChar(ch))?,
} }
} }
@ -132,7 +143,8 @@ impl<'a> Lexer<'a> {
} }
} }
// Try to convert the string representation of the value to i64 // Try to convert the string representation of the value to i64. The error is mapped to
// the appropriate LexErr
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?; let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
self.push_tok(T![i64(i64val)]); self.push_tok(T![i64(i64val)]);
@ -143,24 +155,28 @@ impl<'a> Lexer<'a> {
/// Lex characters as a string until encountering an unescaped closing doublequoute char '"'. /// Lex characters as a string until encountering an unescaped closing doublequoute char '"'.
/// The successfully lexed string literal token is appended to the stored tokens. /// The successfully lexed string literal token is appended to the stored tokens.
fn lex_str(&mut self) -> Result<(), LexErr> { fn lex_str(&mut self) -> Result<(), LexErr> {
// Opening " was consumed in match // The opening " was consumed in match, so a fresh string can be used
let mut text = String::new(); let mut text = String::new();
// Read all chars until encountering the closing " // Read all chars until encountering the closing "
loop { loop {
match self.peek() { match self.peek() {
// An unescaped doubleqoute ends the current string
'"' => break, '"' => break,
// If the end of file is reached while still waiting for '"', error out // If the end of file is reached while still waiting for '"', error out
'\0' => Err(LexErr::MissingClosingString)?, '\0' => Err(LexErr::MissingClosingString)?,
_ => match self.next() { _ => match self.next() {
// Backshlash indicates an escaped character // Backslash indicates an escaped character, so consume one more char and
// treat it as the escaped char
'\\' => match self.next() { '\\' => match self.next() {
'n' => text.push('\n'), 'n' => text.push('\n'),
'r' => text.push('\r'), 'r' => text.push('\r'),
't' => text.push('\t'), 't' => text.push('\t'),
'\\' => text.push('\\'), '\\' => text.push('\\'),
'"' => text.push('"'), '"' => text.push('"'),
// If the escaped char is not handled, it is unsupported and an error
ch => Err(LexErr::InvalidStrEscape(ch))?, ch => Err(LexErr::InvalidStrEscape(ch))?,
}, },
// All other characters are simply appended to the string // All other characters are simply appended to the string
@ -219,18 +235,23 @@ impl<'a> Lexer<'a> {
self.tokens.push(token); self.tokens.push(token);
} }
/// Same as `push_tok` but also consumes the next token, removing it from the code iter /// Same as `push_tok` but also consumes the next token, removing it from the code iter. This
/// is useful when lexing double char tokens where the second token has only been peeked.
fn push_tok_consume(&mut self, token: Token) { fn push_tok_consume(&mut self, token: Token) {
self.next(); self.next();
self.tokens.push(token); self.tokens.push(token);
} }
/// Advance to next character and return the removed char /// Advance to next character and return the removed char. When the end of the code is reached,
/// `'\0'` is returned. This is used instead of an Option::None since it allows for much
/// shorter and cleaner code in the main loop. The `'\0'` character would not be valid anyways
fn next(&mut self) -> char { fn next(&mut self) -> char {
self.code.next().unwrap_or('\0') self.code.next().unwrap_or('\0')
} }
/// Get the next character without removing it /// Get the next character without removing it. When the end of the code is reached,
/// `'\0'` is returned. This is used instead of an Option::None since it allows for much
/// shorter and cleaner code in the main loop. The `'\0'` character would not be valid anyways
fn peek(&mut self) -> char { fn peek(&mut self) -> char {
self.code.peek().copied().unwrap_or('\0') self.code.peek().copied().unwrap_or('\0')
} }
@ -240,6 +261,7 @@ impl<'a> Lexer<'a> {
mod tests { mod tests {
use crate::{lexer::lex, T}; use crate::{lexer::lex, T};
/// A general test to check if the lexer actually lexes tokens correctly
#[test] #[test]
fn test_lexer() { fn test_lexer() {
let code = r#"53+1-567_000 * / % | ~ ! < > & ^ ({[]});= <- >= <= let code = r#"53+1-567_000 * / % | ~ ! < > & ^ ({[]});= <- >= <=

View File

@ -7,18 +7,25 @@ pub mod stringstore;
pub mod astoptimizer; pub mod astoptimizer;
pub mod util; pub mod util;
/// A bunch of full program tests using the example code programs as test subjects.
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::interpreter::{Interpreter, Value}; use crate::interpreter::{Interpreter, Value};
use std::fs::read_to_string; use std::fs::read_to_string;
/// Run a nek program with the given filename from the examples directory and assert the
/// captured output with the expected result. This only works if the program just outputs one
/// value as the result
fn run_example_check_single_i64_output(filename: &str, correct_result: i64) { fn run_example_check_single_i64_output(filename: &str, correct_result: i64) {
let mut interpreter = Interpreter::new(); let mut interpreter = Interpreter::new();
// Enable output capturing. This captures all calls to `print`
interpreter.capture_output = true; interpreter.capture_output = true;
// Load and run the given program
let code = read_to_string(format!("examples/{filename}")).unwrap(); let code = read_to_string(format!("examples/{filename}")).unwrap();
interpreter.run_str(&code); interpreter.run_str(&code);
// Compare the captured output with the expected value
let expected_output = [Value::I64(correct_result)]; let expected_output = [Value::I64(correct_result)];
assert_eq!(interpreter.output(), &expected_output); assert_eq!(interpreter.output(), &expected_output);
} }

View File

@ -2,6 +2,8 @@ use std::{env::args, fs, process::exit};
use nek_lang::{interpreter::Interpreter, nice_panic}; use nek_lang::{interpreter::Interpreter, nice_panic};
/// Cli configuration flags and arguments. This could be done with `clap`, but since only so few
/// arguments are supported this seems kind of overkill.
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct CliConfig { struct CliConfig {
print_tokens: bool, print_tokens: bool,
@ -38,6 +40,7 @@ fn main() {
Ok(code) => code, Ok(code) => code,
Err(_) => nice_panic!("Error: Could not read file '{}'", file), Err(_) => nice_panic!("Error: Could not read file '{}'", file),
}; };
// Lex, parse and run the program
interpreter.run_str(&code); interpreter.run_str(&code);
} else { } else {
println!("Error: No file given\n"); println!("Error: No file given\n");

View File

@ -8,24 +8,34 @@ use crate::{
T, T,
}; };
/// Errors that can occur while parsing
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum ParseErr { pub enum ParseErr {
#[error("Unexpected Token \"{0:?}\", expected \"{1}\"")] #[error("Unexpected Token \"{0:?}\", expected \"{1}\"")]
UnexpectedToken(Token, String), UnexpectedToken(Token, String),
#[error("Left hand side of declaration is not a variable")] #[error("Left hand side of declaration is not a variable")]
DeclarationOfNonVar, DeclarationOfNonVar,
#[error("Use of undefined variable \"{0}\"")] #[error("Use of undefined variable \"{0}\"")]
UseOfUndeclaredVar(String), UseOfUndeclaredVar(String),
#[error("Use of undefined function \"{0}\"")] #[error("Use of undefined function \"{0}\"")]
UseOfUndeclaredFun(String), UseOfUndeclaredFun(String),
#[error("Redeclation of function \"{0}\"")] #[error("Redeclation of function \"{0}\"")]
RedeclarationFun(String), RedeclarationFun(String),
#[error("Function not declared at top level \"{0}\"")] #[error("Function not declared at top level \"{0}\"")]
FunctionOnNonTopLevel(String), FunctionOnNonTopLevel(String),
} }
/// A result that can either be Ok, or a ParseErr
type ResPE<T> = Result<T, ParseErr>; type ResPE<T> = Result<T, ParseErr>;
/// This macro can be used to quickly and easily assert if the next token is matching the expected
/// token and return an appropriate error if not. Since this is intended to be used inside the
/// parser, the first argument should always be `self`.
macro_rules! validate_next { macro_rules! validate_next {
($self:ident, $expected_tok:pat, $expected_str:expr) => { ($self:ident, $expected_tok:pat, $expected_str:expr) => {
match $self.next() { match $self.next() {
@ -41,6 +51,7 @@ pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A
parser.parse() parser.parse()
} }
/// A parser that takes in a Token Stream and can create a full abstract syntax tree from it.
struct Parser<T: Iterator<Item = Token>> { struct Parser<T: Iterator<Item = Token>> {
tokens: PutBackIter<T>, tokens: PutBackIter<T>,
string_store: StringStore, string_store: StringStore,
@ -65,6 +76,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
} }
} }
/// Consume the parser and try to create the abstract syntax tree from the token stream
pub fn parse(mut self) -> ResPE<Ast> { pub fn parse(mut self) -> ResPE<Ast> {
let main = self.parse_scoped_block()?; let main = self.parse_scoped_block()?;
Ok(Ast { Ok(Ast {
@ -73,25 +85,32 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}) })
} }
/// Parse a series of statements together as a BlockScope. This will continuously parse
/// statements until encountering end-of-file or a block end '}' .
fn parse_scoped_block(&mut self) -> ResPE<BlockScope> { fn parse_scoped_block(&mut self) -> ResPE<BlockScope> {
self.parse_scoped_block_fp_offset(0) self.parse_scoped_block_fp_offset(0)
} }
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until /// Same as parse_scoped_block, but an offset to the framepointer can be specified to allow
/// encountering end-of-file or a block end '}' . /// for easily passing variables into scopes from the outside. This is used when parsing
fn parse_scoped_block_fp_offset(&mut self, framepoint_offset: usize) -> ResPE<BlockScope> { /// function calls
fn parse_scoped_block_fp_offset(&mut self, framepointer_offset: usize) -> ResPE<BlockScope> {
self.nesting_level += 1; self.nesting_level += 1;
let framepointer = self.var_stack.len() - framepoint_offset; let framepointer = self.var_stack.len() - framepointer_offset;
let mut prog = Vec::new(); let mut prog = Vec::new();
loop { loop {
match self.peek() { match self.peek() {
// Just a semicolon is an empty statement. So just consume it
T![;] => { T![;] => {
self.next(); self.next();
} }
// '}' end the current block and EoF ends everything, as the end of the tokenstream
// is reached
T![EoF] | T!['}'] => break, T![EoF] | T!['}'] => break,
// Create a new scoped block
T!['{'] => { T!['{'] => {
self.next(); self.next();
prog.push(Statement::Block(self.parse_scoped_block()?)); prog.push(Statement::Block(self.parse_scoped_block()?));
@ -99,49 +118,57 @@ impl<T: Iterator<Item = Token>> Parser<T> {
validate_next!(self, T!['}'], "}"); validate_next!(self, T!['}'], "}");
} }
// By default try to lex a statement // By default try to lex statements
_ => prog.push(self.parse_stmt()?), _ => prog.push(self.parse_stmt()?),
} }
} }
// Reset the stack to where it was before entering the scope
self.var_stack.truncate(framepointer); self.var_stack.truncate(framepointer);
self.nesting_level -= 1; self.nesting_level -= 1;
Ok(prog) Ok(prog)
} }
/// Parse a single statement from the tokens. /// Parse a single statement from the tokens
fn parse_stmt(&mut self) -> ResPE<Statement> { fn parse_stmt(&mut self) -> ResPE<Statement> {
let stmt = match self.peek() { let stmt = match self.peek() {
// Break statement
T![break] => { T![break] => {
self.next(); self.next();
// After the statement, there must be a semicolon
validate_next!(self, T![;], ";"); validate_next!(self, T![;], ";");
Statement::Break Statement::Break
} }
// Continue statement
T![continue] => { T![continue] => {
self.next(); self.next();
// After the statement, there must be a semicolon
validate_next!(self, T![;], ";"); validate_next!(self, T![;], ";");
Statement::Continue Statement::Continue
} }
// Loop statement
T![loop] => Statement::Loop(self.parse_loop()?), T![loop] => Statement::Loop(self.parse_loop()?),
// Print statement
T![print] => { T![print] => {
self.next(); self.next();
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
// After a statement, there must be a semicolon // After the statement, there must be a semicolon
validate_next!(self, T![;], ";"); validate_next!(self, T![;], ";");
Statement::Print(expr) Statement::Print(expr)
} }
// Return statement
T![return] => { T![return] => {
self.next(); self.next();
let stmt = Statement::Return(self.parse_expr()?); let stmt = Statement::Return(self.parse_expr()?);
@ -152,23 +179,29 @@ impl<T: Iterator<Item = Token>> Parser<T> {
stmt stmt
} }
// If statement
T![if] => Statement::If(self.parse_if()?), T![if] => Statement::If(self.parse_if()?),
// Function definition statement
T![fun] => { T![fun] => {
self.next(); self.next();
// Expect an identifier as the function name
let fun_name = match self.next() { let fun_name = match self.next() {
T![ident(fun_name)] => fun_name, T![ident(fun_name)] => fun_name,
tok => return Err(ParseErr::UnexpectedToken(tok, "<ident>".to_string())), tok => return Err(ParseErr::UnexpectedToken(tok, "<ident>".to_string())),
}; };
// Only allow function definitions on the top level
if self.nesting_level > 1 { if self.nesting_level > 1 {
return Err(ParseErr::FunctionOnNonTopLevel(fun_name)); return Err(ParseErr::FunctionOnNonTopLevel(fun_name));
} }
// Intern the function name
let fun_name = self.string_store.intern_or_lookup(&fun_name); let fun_name = self.string_store.intern_or_lookup(&fun_name);
// Check if the function name already exists
if self.fun_stack.contains(&fun_name) { if self.fun_stack.contains(&fun_name) {
return Err(ParseErr::RedeclarationFun( return Err(ParseErr::RedeclarationFun(
self.string_store self.string_store
@ -178,19 +211,24 @@ impl<T: Iterator<Item = Token>> Parser<T> {
)); ));
} }
// Put the function name on the fucntion stack for precalculating the stack
// positions
let fun_stackpos = self.fun_stack.len(); let fun_stackpos = self.fun_stack.len();
self.fun_stack.push(fun_name); self.fun_stack.push(fun_name);
let mut arg_names = Vec::new(); let mut arg_names = Vec::new();
validate_next!(self, T!['('], "("); validate_next!(self, T!['('], "(");
// Parse the optional arguments inside the parentheses
while matches!(self.peek(), T![ident(_)]) { while matches!(self.peek(), T![ident(_)]) {
let var_name = match self.next() { let var_name = match self.next() {
T![ident(var_name)] => var_name, T![ident(var_name)] => var_name,
_ => unreachable!(), _ => unreachable!(),
}; };
// Intern argument names
let var_name = self.string_store.intern_or_lookup(&var_name); let var_name = self.string_store.intern_or_lookup(&var_name);
arg_names.push(var_name); arg_names.push(var_name);
@ -221,10 +259,13 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}) })
} }
// Either a variable declaration statement or an expression statement
_ => { _ => {
// To decide if it is a declaration or an expression, a lookahead is needed
let first = self.next(); let first = self.next();
let stmt = match (first, self.peek()) { let stmt = match (first, self.peek()) {
// Identifier and "<-" is a declaration
(T![ident(name)], T![<-]) => { (T![ident(name)], T![<-]) => {
self.next(); self.next();
@ -240,7 +281,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
rhs, rhs,
}) })
} }
// Anything else must be an expression
(first, _) => { (first, _) => {
// Put the first token back in order for the parse_expr to see it
self.putback(first); self.putback(first);
Statement::Expr(self.parse_expr()?) Statement::Expr(self.parse_expr()?)
} }
@ -269,6 +312,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
let mut body_false = BlockScope::default(); let mut body_false = BlockScope::default();
// Optionally parse the else part
if self.peek() == &T![else] { if self.peek() == &T![else] {
self.next(); self.next();
@ -293,9 +337,11 @@ impl<T: Iterator<Item = Token>> Parser<T> {
let mut condition = None; let mut condition = None;
let mut advancement = None; let mut advancement = None;
// Check if the optional condition is present
if !matches!(self.peek(), T!['{']) { if !matches!(self.peek(), T!['{']) {
condition = Some(self.parse_expr()?); condition = Some(self.parse_expr()?);
// Check if the optional advancement is present
if matches!(self.peek(), T![;]) { if matches!(self.peek(), T![;]) {
self.next(); self.next();
advancement = Some(self.parse_expr()?); advancement = Some(self.parse_expr()?);
@ -321,7 +367,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
self.parse_expr_precedence(lhs, 0) self.parse_expr_precedence(lhs, 0)
} }
/// Parse binary expressions with a precedence equal to or higher than min_prec /// Parse binary expressions with a precedence equal to or higher than min_prec.
/// This uses the precedence climbing methode for dealing with the operator precedences:
/// https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method
fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> ResPE<Expression> { fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> ResPE<Expression> {
while let Some(binop) = &self.peek().try_to_binop() { while let Some(binop) = &self.peek().try_to_binop() {
// Stop if the next operator has a lower binding power // Stop if the next operator has a lower binding power
@ -349,7 +397,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Ok(lhs) Ok(lhs)
} }
/// Parse a primary expression (for now only number) /// Parse a primary expression. A primary can be a literal value, variable, function call,
/// array indexing, parentheses grouping or a unary operation
fn parse_primary(&mut self) -> ResPE<Expression> { fn parse_primary(&mut self) -> ResPE<Expression> {
let primary = match self.next() { let primary = match self.next() {
// Literal i64 // Literal i64
@ -370,6 +419,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
// Array sccess, aka indexing. An ident followed by square brackets containing the // Array sccess, aka indexing. An ident followed by square brackets containing the
// index as an expression // index as an expression
T![ident(name)] if self.peek() == &T!['['] => { T![ident(name)] if self.peek() == &T!['['] => {
// Get the stack position of the array variable
let sid = self.string_store.intern_or_lookup(&name); let sid = self.string_store.intern_or_lookup(&name);
let stackpos = self.get_stackpos(sid)?; let stackpos = self.get_stackpos(sid)?;
@ -382,6 +432,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Expression::ArrayAccess(sid, stackpos, index.into()) Expression::ArrayAccess(sid, stackpos, index.into())
} }
// Identifier followed by parenthesis is a function call
T![ident(name)] if self.peek() == &T!['('] => { T![ident(name)] if self.peek() == &T!['('] => {
// Skip the opening parenthesis // Skip the opening parenthesis
self.next(); self.next();
@ -390,6 +441,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
let mut args = Vec::new(); let mut args = Vec::new();
// Parse the arguments as expressions
while !matches!(self.peek(), T![')']) { while !matches!(self.peek(), T![')']) {
let arg = self.parse_expr()?; let arg = self.parse_expr()?;
args.push(arg); args.push(arg);
@ -402,19 +454,24 @@ impl<T: Iterator<Item = Token>> Parser<T> {
validate_next!(self, T![')'], ")"); validate_next!(self, T![')'], ")");
// Find the function stack position
let fun_stackpos = self.get_fun_stackpos(sid)?; let fun_stackpos = self.get_fun_stackpos(sid)?;
Expression::FunCall(sid, fun_stackpos, args) Expression::FunCall(sid, fun_stackpos, args)
} }
// Just an identifier is a variable
T![ident(name)] => { T![ident(name)] => {
// Find the variable stack position
let sid = self.string_store.intern_or_lookup(&name); let sid = self.string_store.intern_or_lookup(&name);
let stackpos = self.get_stackpos(sid)?; let stackpos = self.get_stackpos(sid)?;
Expression::Var(sid, stackpos) Expression::Var(sid, stackpos)
} }
// Parentheses grouping // Parentheses grouping
T!['('] => { T!['('] => {
// Contained inbetween the parentheses can be any other expression
let inner_expr = self.parse_expr()?; let inner_expr = self.parse_expr()?;
// Verify that there is a closing parenthesis // Verify that there is a closing parenthesis
@ -425,7 +482,10 @@ impl<T: Iterator<Item = Token>> Parser<T> {
// Unary operations or invalid token // Unary operations or invalid token
tok => match tok.try_to_unop() { tok => match tok.try_to_unop() {
// If the token is a valid unary operation, parse it as such
Some(uot) => Expression::UnOp(uot, self.parse_primary()?.into()), Some(uot) => Expression::UnOp(uot, self.parse_primary()?.into()),
// Otherwise it's an unexpected token
None => return Err(ParseErr::UnexpectedToken(tok, "primary".to_string())), None => return Err(ParseErr::UnexpectedToken(tok, "primary".to_string())),
}, },
}; };
@ -433,6 +493,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Ok(primary) Ok(primary)
} }
/// Try to get the position of a variable on the variable stack. This is needed to precalculate
/// the stackpositions in order to save time when executing
fn get_stackpos(&self, varid: Sid) -> ResPE<usize> { fn get_stackpos(&self, varid: Sid) -> ResPE<usize> {
self.var_stack self.var_stack
.iter() .iter()
@ -447,6 +509,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
)) ))
} }
/// Try to get the position of a function on the function stack. This is needed to precalculate
/// the stackpositions in order to save time when executing
fn get_fun_stackpos(&self, varid: Sid) -> ResPE<usize> { fn get_fun_stackpos(&self, varid: Sid) -> ResPE<usize> {
self.fun_stack self.fun_stack
.iter() .iter()
@ -461,16 +525,19 @@ impl<T: Iterator<Item = Token>> Parser<T> {
)) ))
} }
/// Get the next Token without removing it /// Get the next Token without removing it. If there are no more tokens left, the EoF token is
/// returned. This follows the same reasoning as in the Lexer
fn peek(&mut self) -> &Token { fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&T![EoF]) self.tokens.peek().unwrap_or(&T![EoF])
} }
/// Put a single token back into the token stream
fn putback(&mut self, tok: Token) { fn putback(&mut self, tok: Token) {
self.tokens.putback(tok); self.tokens.putback(tok);
} }
/// Advance to next Token and return the removed Token /// Advance to next Token and return the removed Token. If there are no more tokens left, the
/// EoF token is returned. This follows the same reasoning as in the Lexer
fn next(&mut self) -> Token { fn next(&mut self) -> Token {
self.tokens.next().unwrap_or(T![EoF]) self.tokens.next().unwrap_or(T![EoF])
} }
@ -484,6 +551,7 @@ mod tests {
T, T,
}; };
/// A very simple test to check if the parser correctly parses a simple expression
#[test] #[test]
fn test_parser() { fn test_parser() {
// Expression: 1 + 2 * 3 - 4 // Expression: 1 + 2 * 3 - 4

View File

@ -1,20 +1,35 @@
use std::collections::HashMap; use std::collections::HashMap;
/// A StringID that identifies a String inside the stringstore. This is only valid for the
/// StringStore that created the ID. These StringIDs can be trivialy and cheaply copied
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Sid(usize); pub struct Sid(usize);
/// A Datastructure that stores strings, handing out StringIDs that can be used to retrieve the
/// real strings at a later point. This is called interning.
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct StringStore { pub struct StringStore {
/// The actual strings that are stored in the StringStore. The StringIDs match the index of the
/// string inside of this strings vector
strings: Vec<String>, strings: Vec<String>,
/// A Hashmap that allows to match already interned Strings to their StringID. This allows for
/// deduplication since the same string won't be stored twice
sids: HashMap<String, Sid>, sids: HashMap<String, Sid>,
} }
impl StringStore { impl StringStore {
/// Create a new empty StringStore
pub fn new() -> Self { pub fn new() -> Self {
Self { strings: Vec::new(), sids: HashMap::new() } Self { strings: Vec::new(), sids: HashMap::new() }
} }
/// Put the given string into the StringStore and get a StringID in return. If the string is
/// not yet stored, it will be after this.
///
/// Note: The generated StringIDs are only valid for the StringStore that created them. Using
/// the IDs with another StringStore is undefined behavior. It might return wrong Strings or
/// None.
pub fn intern_or_lookup(&mut self, text: &str) -> Sid { pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
self.sids.get(text).copied().unwrap_or_else(|| { self.sids.get(text).copied().unwrap_or_else(|| {
let sid = Sid(self.strings.len()); let sid = Sid(self.strings.len());
@ -24,6 +39,11 @@ impl StringStore {
}) })
} }
/// Lookup and retrieve a string by the StringID. If the String is not found, None is returned.
///
/// Note: The generated StringIDs are only valid for the StringStore that created them. Using
/// the IDs with another StringStore is undefined behavior. It might return wrong Strings or
/// None.
pub fn lookup(&self, sid: Sid) -> Option<&String> { pub fn lookup(&self, sid: Sid) -> Option<&String> {
self.strings.get(sid.0) self.strings.get(sid.0)
} }

View File

@ -64,6 +64,7 @@ pub enum Combo {
LessThanMinus, LessThanMinus,
} }
/// Tokens are a group of one or more sourcecode characters that have a meaning together
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Token { pub enum Token {
/// Literal value token /// Literal value token
@ -72,7 +73,7 @@ pub enum Token {
/// Keyword token /// Keyword token
Keyword(Keyword), Keyword(Keyword),
/// Identifier (name for variables, functions, ...) /// Identifier token (names for variables, functions, ...)
Ident(String), Ident(String),
/// Combined tokens consisting of multiple characters /// Combined tokens consisting of multiple characters
@ -87,7 +88,8 @@ pub enum Token {
/// Semicolon (";") /// Semicolon (";")
Semicolon, Semicolon,
/// End of file /// End of file (This is not generated by the lexer, but the parser uses this to find the
/// end of the token stream)
EoF, EoF,
/// Left Bracket ("[") /// Left Bracket ("[")
@ -182,6 +184,8 @@ impl Token {
}) })
} }
/// If the token can be used as a unary operation type, get the matching UnOpType. Otherwise
/// return None
pub fn try_to_unop(&self) -> Option<UnOpType> { pub fn try_to_unop(&self) -> Option<UnOpType> {
Some(match self { Some(match self {
T![-] => UnOpType::Negate, T![-] => UnOpType::Negate,
@ -193,7 +197,11 @@ impl Token {
} }
} }
/// Macro to quickly create a token of the specified kind /// Macro to quickly create a token of the specified kind. As this is implemented as a macro, it
/// can be used anywhere including in patterns.
///
/// An implementation should exist for each token, so that there is no need to ever write out the
/// long token definitions.
#[macro_export] #[macro_export]
macro_rules! T { macro_rules! T {
// Keywords // Keywords