Add a few more comments
This commit is contained in:
parent
5f720ad7c3
commit
70c9d073f9
86
src/ast.rs
86
src/ast.rs
@ -1,80 +1,82 @@
|
|||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use crate::stringstore::{StringStore, Sid};
|
use crate::stringstore::{Sid, StringStore};
|
||||||
|
|
||||||
/// Types for binary operators
|
/// Types for binary operations
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum BinOpType {
|
pub enum BinOpType {
|
||||||
/// Addition
|
/// Addition ("+")
|
||||||
Add,
|
Add,
|
||||||
|
|
||||||
/// Subtraction
|
/// Subtraction ("-")
|
||||||
Sub,
|
Sub,
|
||||||
|
|
||||||
/// Multiplication
|
/// Multiplication ("*")
|
||||||
Mul,
|
Mul,
|
||||||
|
|
||||||
/// Divide
|
/// Division ("/")
|
||||||
Div,
|
Div,
|
||||||
|
|
||||||
/// Modulo
|
/// Modulo / Remainder ("%")
|
||||||
Mod,
|
Mod,
|
||||||
|
|
||||||
/// Compare Equal
|
/// Compare Equal ("==")
|
||||||
EquEqu,
|
EquEqu,
|
||||||
|
|
||||||
/// Compare Not Equal
|
/// Compare Not Equal ("!=")
|
||||||
NotEqu,
|
NotEqu,
|
||||||
|
|
||||||
/// Less than
|
/// Compare Less than ("<")
|
||||||
Less,
|
Less,
|
||||||
|
|
||||||
/// Less than or Equal
|
/// Compare Less than or Equal ("<=")
|
||||||
LessEqu,
|
LessEqu,
|
||||||
|
|
||||||
/// Greater than
|
/// Compare Greater than (">")
|
||||||
Greater,
|
Greater,
|
||||||
|
|
||||||
/// Greater than or Equal
|
/// Compare Greater than or Equal (">=")
|
||||||
GreaterEqu,
|
GreaterEqu,
|
||||||
|
|
||||||
/// Bitwise OR (inclusive or)
|
/// Bitwise Or ("|")
|
||||||
BOr,
|
BOr,
|
||||||
|
|
||||||
/// Bitwise And
|
/// Bitwise And ("&")
|
||||||
BAnd,
|
BAnd,
|
||||||
|
|
||||||
/// Bitwise Xor (exclusive or)
|
/// Bitwise Xor / Exclusive Or ("^")
|
||||||
BXor,
|
BXor,
|
||||||
|
|
||||||
/// Logical And
|
/// Logical And ("&&")
|
||||||
LAnd,
|
LAnd,
|
||||||
|
|
||||||
/// Logical Or
|
/// Logical Or ("||")
|
||||||
LOr,
|
LOr,
|
||||||
|
|
||||||
/// Shift Left
|
/// Bitwise Shift Left ("<<")
|
||||||
Shl,
|
Shl,
|
||||||
|
|
||||||
/// Shift Right
|
/// Bitwise Shift Right (">>")
|
||||||
Shr,
|
Shr,
|
||||||
|
|
||||||
/// Assign value to variable
|
/// Assign value to variable ("=")
|
||||||
Assign,
|
Assign,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Types for unary operations
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum UnOpType {
|
pub enum UnOpType {
|
||||||
/// Unary Negate
|
/// Unary Negation ("-")
|
||||||
Negate,
|
Negate,
|
||||||
|
|
||||||
/// Bitwise Not
|
/// Bitwise Not / Bitflip ("~")
|
||||||
BNot,
|
BNot,
|
||||||
|
|
||||||
/// Logical Not
|
/// Logical Not ("!")
|
||||||
LNot,
|
LNot,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for possible Expression variants
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
/// Integer literal (64-bit)
|
/// Integer literal (64-bit)
|
||||||
@ -82,15 +84,16 @@ pub enum Expression {
|
|||||||
/// String literal
|
/// String literal
|
||||||
String(Sid),
|
String(Sid),
|
||||||
|
|
||||||
/// Array with size
|
/// Array with size as an expression
|
||||||
ArrayLiteral(Box<Expression>),
|
ArrayLiteral(Box<Expression>),
|
||||||
|
/// Array access with name, stackpos and position as expression
|
||||||
/// Array access with name, stackpos and position
|
|
||||||
ArrayAccess(Sid, usize, Box<Expression>),
|
ArrayAccess(Sid, usize, Box<Expression>),
|
||||||
|
|
||||||
|
/// Function call with name, stackpos and the arguments as a vec of expressions
|
||||||
FunCall(Sid, usize, Vec<Expression>),
|
FunCall(Sid, usize, Vec<Expression>),
|
||||||
|
|
||||||
/// Variable
|
/// Variable with name and the stackpos from behind. This means that stackpos 0 refers to the
|
||||||
|
/// last variable on the stack and not the first
|
||||||
Var(Sid, usize),
|
Var(Sid, usize),
|
||||||
/// Binary operation. Consists of type, left hand side and right hand side
|
/// Binary operation. Consists of type, left hand side and right hand side
|
||||||
BinOp(BinOpType, Box<Expression>, Box<Expression>),
|
BinOp(BinOpType, Box<Expression>, Box<Expression>),
|
||||||
@ -98,6 +101,7 @@ pub enum Expression {
|
|||||||
UnOp(UnOpType, Box<Expression>),
|
UnOp(UnOpType, Box<Expression>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for a loop
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub struct Loop {
|
pub struct Loop {
|
||||||
/// The condition that determines if the loop should continue
|
/// The condition that determines if the loop should continue
|
||||||
@ -108,6 +112,7 @@ pub struct Loop {
|
|||||||
pub body: BlockScope,
|
pub body: BlockScope,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for an if
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub struct If {
|
pub struct If {
|
||||||
/// The condition
|
/// The condition
|
||||||
@ -118,40 +123,65 @@ pub struct If {
|
|||||||
pub body_false: BlockScope,
|
pub body_false: BlockScope,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for a function declaration
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub struct FunDecl {
|
pub struct FunDecl {
|
||||||
|
/// The function name as StringID, stored in the stringstore
|
||||||
pub name: Sid,
|
pub name: Sid,
|
||||||
|
/// The absolute position on the function stack where the function is stored
|
||||||
pub fun_stackpos: usize,
|
pub fun_stackpos: usize,
|
||||||
|
/// The argument names as StringIDs
|
||||||
pub argnames: Vec<Sid>,
|
pub argnames: Vec<Sid>,
|
||||||
|
/// The function body
|
||||||
pub body: Rc<BlockScope>,
|
pub body: Rc<BlockScope>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for a variable declaration
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub struct VarDecl {
|
pub struct VarDecl {
|
||||||
|
/// The variable name as StringID, stored in the stringstore
|
||||||
pub name: Sid,
|
pub name: Sid,
|
||||||
|
/// The absolute position on the variable stack where the variable is stored
|
||||||
pub var_stackpos: usize,
|
pub var_stackpos: usize,
|
||||||
|
/// The right hand side that generates the initial value for the variable
|
||||||
pub rhs: Expression,
|
pub rhs: Expression,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ast Node for the possible Statement variants
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum Statement {
|
pub enum Statement {
|
||||||
|
/// Return from a function with the given result value as an expression
|
||||||
Return(Expression),
|
Return(Expression),
|
||||||
|
/// Break out of the current loop
|
||||||
Break,
|
Break,
|
||||||
|
/// End the current loop iteration early and continue with the next loop iteration
|
||||||
Continue,
|
Continue,
|
||||||
|
/// A variable declaration
|
||||||
Declaration(VarDecl),
|
Declaration(VarDecl),
|
||||||
|
/// A function declaration
|
||||||
FunDeclare(FunDecl),
|
FunDeclare(FunDecl),
|
||||||
|
/// A simple expression. This could be a function call or an assignment for example
|
||||||
Expr(Expression),
|
Expr(Expression),
|
||||||
|
/// A freestanding block scope
|
||||||
Block(BlockScope),
|
Block(BlockScope),
|
||||||
|
/// A loop
|
||||||
Loop(Loop),
|
Loop(Loop),
|
||||||
|
/// An if
|
||||||
If(If),
|
If(If),
|
||||||
|
/// A print statement that will output the value of the given expression to the terminal
|
||||||
Print(Expression),
|
Print(Expression),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A number of statements that form a block of code together
|
||||||
pub type BlockScope = Vec<Statement>;
|
pub type BlockScope = Vec<Statement>;
|
||||||
|
|
||||||
|
/// A full abstract syntax tree
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct Ast {
|
pub struct Ast {
|
||||||
|
/// The stringstore contains the actual string values which are replaced with StringIDs in the
|
||||||
|
/// Ast. So this is needed to get the actual strings later
|
||||||
pub stringstore: StringStore,
|
pub stringstore: StringStore,
|
||||||
|
/// The main (top-level) code given as a number of statements
|
||||||
pub main: BlockScope,
|
pub main: BlockScope,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,14 @@
|
|||||||
use crate::ast::{Ast, BlockScope, Expression, If, Loop, Statement, BinOpType, UnOpType, VarDecl};
|
use crate::ast::{Ast, BlockScope, Expression, If, Loop, Statement, BinOpType, UnOpType, VarDecl};
|
||||||
|
|
||||||
|
/// A trait that allows to optimize an abstract syntax tree
|
||||||
pub trait AstOptimizer {
|
pub trait AstOptimizer {
|
||||||
|
/// Consume an abstract syntax tree and return an ast that has the same functionality but with
|
||||||
|
/// optional optimizations.
|
||||||
fn optimize(ast: Ast) -> Ast;
|
fn optimize(ast: Ast) -> Ast;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A very simple optimizer that applies trivial optimizations like precalculation expressions that
|
||||||
|
/// have only literals as operands
|
||||||
pub struct SimpleAstOptimizer;
|
pub struct SimpleAstOptimizer;
|
||||||
|
|
||||||
impl AstOptimizer for SimpleAstOptimizer {
|
impl AstOptimizer for SimpleAstOptimizer {
|
||||||
|
|||||||
@ -10,6 +10,7 @@ use crate::{
|
|||||||
stringstore::{Sid, StringStore},
|
stringstore::{Sid, StringStore},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Runtime errors that can occur during execution
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum RuntimeError {
|
pub enum RuntimeError {
|
||||||
#[error("Invalid array Index: {0:?}")]
|
#[error("Invalid array Index: {0:?}")]
|
||||||
@ -37,41 +38,62 @@ pub enum RuntimeError {
|
|||||||
InvalidNumberOfArgs(String, usize, usize),
|
InvalidNumberOfArgs(String, usize, usize),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Possible variants for the values
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
|
/// 64-bit integer value
|
||||||
I64(i64),
|
I64(i64),
|
||||||
|
/// String value
|
||||||
String(Sid),
|
String(Sid),
|
||||||
|
/// Array value
|
||||||
Array(Rc<RefCell<Vec<Value>>>),
|
Array(Rc<RefCell<Vec<Value>>>),
|
||||||
|
/// Void value
|
||||||
Void,
|
Void,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The exit type of a block. When a block ends, the exit type specified why the block ended.
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum BlockExit {
|
pub enum BlockExit {
|
||||||
|
/// Normal exit when the block just ends normally (no returns / breaks / continues / etc.)
|
||||||
Normal,
|
Normal,
|
||||||
|
/// The block ended through a break statement. This will be propagated up to the next loop
|
||||||
|
/// and cause it to fully terminate
|
||||||
Break,
|
Break,
|
||||||
|
/// The block ended through a continue statement. This will be propagated up to the next loop
|
||||||
|
/// and cause it to start the next iteration
|
||||||
Continue,
|
Continue,
|
||||||
|
/// The block ended through a return statement. This will propagate up to the next function
|
||||||
|
/// body end
|
||||||
Return(Value),
|
Return(Value),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Interpreter {
|
pub struct Interpreter {
|
||||||
|
/// Run the SimpleAstOptimizer over the Ast before executing
|
||||||
pub optimize_ast: bool,
|
pub optimize_ast: bool,
|
||||||
|
|
||||||
|
/// Print the tokens after lexing
|
||||||
pub print_tokens: bool,
|
pub print_tokens: bool,
|
||||||
|
/// Print the ast after parsing
|
||||||
pub print_ast: bool,
|
pub print_ast: bool,
|
||||||
|
|
||||||
|
/// Capture the output values of print statements instead of printing them to the terminal
|
||||||
pub capture_output: bool,
|
pub capture_output: bool,
|
||||||
|
/// The stored values that were captured
|
||||||
output: Vec<Value>,
|
output: Vec<Value>,
|
||||||
|
|
||||||
// Variable table stores the runtime values of variables
|
/// Variable table stores the runtime values of variables as a stack
|
||||||
vartable: Vec<Value>,
|
vartable: Vec<Value>,
|
||||||
|
|
||||||
|
/// Function table stores the functions during runtime as a stack
|
||||||
funtable: Vec<FunDecl>,
|
funtable: Vec<FunDecl>,
|
||||||
|
|
||||||
|
/// The stringstore contains all strings used throughout the program
|
||||||
stringstore: StringStore,
|
stringstore: StringStore,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Interpreter {
|
impl Interpreter {
|
||||||
|
/// Create a new Interpreter
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
optimize_ast: true,
|
optimize_ast: true,
|
||||||
@ -79,20 +101,28 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the captured output
|
||||||
pub fn output(&self) -> &[Value] {
|
pub fn output(&self) -> &[Value] {
|
||||||
&self.output
|
&self.output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to retrieve a variable value from the varstack. The idx is the index from the back of
|
||||||
|
/// the stack. So 0 is the last value, not the first
|
||||||
fn get_var(&self, idx: usize) -> Option<Value> {
|
fn get_var(&self, idx: usize) -> Option<Value> {
|
||||||
self.vartable.get(self.vartable.len() - idx - 1).cloned()
|
self.vartable.get(self.vartable.len() - idx - 1).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to retrieve a mutable reference to a variable value from the varstack. The idx is the
|
||||||
|
/// index from the back of the stack. So 0 is the last value, not the first
|
||||||
fn get_var_mut(&mut self, idx: usize) -> Option<&mut Value> {
|
fn get_var_mut(&mut self, idx: usize) -> Option<&mut Value> {
|
||||||
let idx = self.vartable.len() - idx - 1;
|
let idx = self.vartable.len() - idx - 1;
|
||||||
self.vartable.get_mut(idx)
|
self.vartable.get_mut(idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lex, parse and then run the given sourecode. This will terminate the program when an error
|
||||||
|
/// occurs and print an appropriate error message.
|
||||||
pub fn run_str(&mut self, code: &str) {
|
pub fn run_str(&mut self, code: &str) {
|
||||||
|
// Lex the tokens
|
||||||
let tokens = match lex(code) {
|
let tokens = match lex(code) {
|
||||||
Ok(tokens) => tokens,
|
Ok(tokens) => tokens,
|
||||||
Err(e) => nice_panic!("Lexing error: {}", e),
|
Err(e) => nice_panic!("Lexing error: {}", e),
|
||||||
@ -102,18 +132,22 @@ impl Interpreter {
|
|||||||
println!("Tokens: {:?}", tokens);
|
println!("Tokens: {:?}", tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the ast
|
||||||
let ast = match parse(tokens) {
|
let ast = match parse(tokens) {
|
||||||
Ok(ast) => ast,
|
Ok(ast) => ast,
|
||||||
Err(e) => nice_panic!("Parsing error: {}", e),
|
Err(e) => nice_panic!("Parsing error: {}", e),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Run the ast
|
||||||
match self.run_ast(ast) {
|
match self.run_ast(ast) {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(e) => nice_panic!("Runtime error: {}", e),
|
Err(e) => nice_panic!("Runtime error: {}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute the given Ast within the interpreter
|
||||||
pub fn run_ast(&mut self, mut ast: Ast) -> Result<(), RuntimeError> {
|
pub fn run_ast(&mut self, mut ast: Ast) -> Result<(), RuntimeError> {
|
||||||
|
// Optimize the ast
|
||||||
if self.optimize_ast {
|
if self.optimize_ast {
|
||||||
ast = SimpleAstOptimizer::optimize(ast);
|
ast = SimpleAstOptimizer::optimize(ast);
|
||||||
}
|
}
|
||||||
@ -122,16 +156,22 @@ impl Interpreter {
|
|||||||
println!("{:#?}", ast.main);
|
println!("{:#?}", ast.main);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Take over the stringstore of the given ast
|
||||||
self.stringstore = ast.stringstore;
|
self.stringstore = ast.stringstore;
|
||||||
|
|
||||||
|
// Run the top level block (the main)
|
||||||
self.run_block(&ast.main)?;
|
self.run_block(&ast.main)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run all statements in the given block
|
||||||
pub fn run_block(&mut self, prog: &BlockScope) -> Result<BlockExit, RuntimeError> {
|
pub fn run_block(&mut self, prog: &BlockScope) -> Result<BlockExit, RuntimeError> {
|
||||||
self.run_block_fp_offset(prog, 0)
|
self.run_block_fp_offset(prog, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Same as run_block, but with an additional framepointer offset. This allows to free more
|
||||||
|
/// values from the stack than normally and can be used when passing arguments inside a
|
||||||
|
/// function body scope from the outside
|
||||||
pub fn run_block_fp_offset(
|
pub fn run_block_fp_offset(
|
||||||
&mut self,
|
&mut self,
|
||||||
prog: &BlockScope,
|
prog: &BlockScope,
|
||||||
@ -139,7 +179,9 @@ impl Interpreter {
|
|||||||
) -> Result<BlockExit, RuntimeError> {
|
) -> Result<BlockExit, RuntimeError> {
|
||||||
let framepointer = self.vartable.len() - framepointer_offset;
|
let framepointer = self.vartable.len() - framepointer_offset;
|
||||||
|
|
||||||
for stmt in prog {
|
let mut block_exit = BlockExit::Normal;
|
||||||
|
|
||||||
|
'blockloop: for stmt in prog {
|
||||||
match stmt {
|
match stmt {
|
||||||
Statement::Break => return Ok(BlockExit::Break),
|
Statement::Break => return Ok(BlockExit::Break),
|
||||||
Statement::Continue => return Ok(BlockExit::Continue),
|
Statement::Continue => return Ok(BlockExit::Continue),
|
||||||
@ -147,8 +189,8 @@ impl Interpreter {
|
|||||||
Statement::Return(expr) => {
|
Statement::Return(expr) => {
|
||||||
let val = self.resolve_expr(expr)?;
|
let val = self.resolve_expr(expr)?;
|
||||||
|
|
||||||
self.vartable.truncate(framepointer);
|
block_exit = BlockExit::Return(val);
|
||||||
return Ok(BlockExit::Return(val));
|
break 'blockloop;
|
||||||
}
|
}
|
||||||
|
|
||||||
Statement::Expr(expr) => {
|
Statement::Expr(expr) => {
|
||||||
@ -163,8 +205,8 @@ impl Interpreter {
|
|||||||
Statement::Block(block) => match self.run_block(block)? {
|
Statement::Block(block) => match self.run_block(block)? {
|
||||||
// Propagate return, continue and break
|
// Propagate return, continue and break
|
||||||
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
|
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
|
||||||
self.vartable.truncate(framepointer);
|
block_exit = be;
|
||||||
return Ok(be);
|
break 'blockloop;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
},
|
},
|
||||||
@ -172,23 +214,26 @@ impl Interpreter {
|
|||||||
Statement::Loop(looop) => {
|
Statement::Loop(looop) => {
|
||||||
// loop runs as long condition != 0
|
// loop runs as long condition != 0
|
||||||
loop {
|
loop {
|
||||||
|
// Check the loop condition
|
||||||
if let Some(condition) = &looop.condition {
|
if let Some(condition) = &looop.condition {
|
||||||
if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
|
if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run the body
|
||||||
let be = self.run_block(&looop.body)?;
|
let be = self.run_block(&looop.body)?;
|
||||||
match be {
|
match be {
|
||||||
// Propagate return
|
// Propagate return
|
||||||
be @ BlockExit::Return(_) => {
|
be @ BlockExit::Return(_) => {
|
||||||
self.vartable.truncate(framepointer);
|
block_exit = be;
|
||||||
return Ok(be);
|
break 'blockloop;
|
||||||
}
|
}
|
||||||
BlockExit::Break => break,
|
BlockExit::Break => break,
|
||||||
BlockExit::Continue | BlockExit::Normal => (),
|
BlockExit::Continue | BlockExit::Normal => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run the advancement
|
||||||
if let Some(adv) = &looop.advancement {
|
if let Some(adv) = &looop.advancement {
|
||||||
self.resolve_expr(&adv)?;
|
self.resolve_expr(&adv)?;
|
||||||
}
|
}
|
||||||
@ -210,6 +255,7 @@ impl Interpreter {
|
|||||||
body_true,
|
body_true,
|
||||||
body_false,
|
body_false,
|
||||||
}) => {
|
}) => {
|
||||||
|
// Run the right block depending on the conditions result being 0 or not
|
||||||
let exit = if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
|
let exit = if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
|
||||||
self.run_block(body_false)?
|
self.run_block(body_false)?
|
||||||
} else {
|
} else {
|
||||||
@ -219,8 +265,8 @@ impl Interpreter {
|
|||||||
match exit {
|
match exit {
|
||||||
// Propagate return, continue and break
|
// Propagate return, continue and break
|
||||||
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
|
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
|
||||||
self.vartable.truncate(framepointer);
|
block_exit = be;
|
||||||
return Ok(be);
|
break 'blockloop;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
@ -234,9 +280,10 @@ impl Interpreter {
|
|||||||
|
|
||||||
self.vartable.truncate(framepointer);
|
self.vartable.truncate(framepointer);
|
||||||
|
|
||||||
Ok(BlockExit::Normal)
|
Ok(block_exit)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute the given expression to retrieve the resulting value
|
||||||
fn resolve_expr(&mut self, expr: &Expression) -> Result<Value, RuntimeError> {
|
fn resolve_expr(&mut self, expr: &Expression) -> Result<Value, RuntimeError> {
|
||||||
let val = match expr {
|
let val = match expr {
|
||||||
Expression::I64(val) => Value::I64(*val),
|
Expression::I64(val) => Value::I64(*val),
|
||||||
@ -271,6 +318,7 @@ impl Interpreter {
|
|||||||
// Function existance has been verified in the parser, so unwrap here shouldn't fail
|
// Function existance has been verified in the parser, so unwrap here shouldn't fail
|
||||||
let expected_num_args = self.funtable.get(*fun_stackpos).unwrap().argnames.len();
|
let expected_num_args = self.funtable.get(*fun_stackpos).unwrap().argnames.len();
|
||||||
|
|
||||||
|
// Check if the number of provided arguments matches the number of expected arguments
|
||||||
if expected_num_args != args_len {
|
if expected_num_args != args_len {
|
||||||
let fun_name = self
|
let fun_name = self
|
||||||
.stringstore
|
.stringstore
|
||||||
@ -284,6 +332,7 @@ impl Interpreter {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run the function body and return the BlockExit type
|
||||||
match self.run_block_fp_offset(
|
match self.run_block_fp_offset(
|
||||||
&Rc::clone(&self.funtable.get(*fun_stackpos).unwrap().body),
|
&Rc::clone(&self.funtable.get(*fun_stackpos).unwrap().body),
|
||||||
expected_num_args,
|
expected_num_args,
|
||||||
@ -297,17 +346,23 @@ impl Interpreter {
|
|||||||
Ok(val)
|
Ok(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrive the value of a given array at the specified index from the varstack. The name is
|
||||||
|
/// given as a StringID and is used to reference the variable name in case of an error. The
|
||||||
|
/// idx is the stackpos where the array variable should be located and the arr_idx is the
|
||||||
|
/// actual array access index, given as an expression.
|
||||||
fn resolve_array_access(
|
fn resolve_array_access(
|
||||||
&mut self,
|
&mut self,
|
||||||
name: Sid,
|
name: Sid,
|
||||||
idx: usize,
|
idx: usize,
|
||||||
arr_idx: &Expression,
|
arr_idx: &Expression,
|
||||||
) -> Result<Value, RuntimeError> {
|
) -> Result<Value, RuntimeError> {
|
||||||
|
// Resolve the array index into a value and check if it is a valid array index
|
||||||
let arr_idx = match self.resolve_expr(arr_idx)? {
|
let arr_idx = match self.resolve_expr(arr_idx)? {
|
||||||
Value::I64(size) if !size.is_negative() => size,
|
Value::I64(size) if !size.is_negative() => size,
|
||||||
val => return Err(RuntimeError::InvalidArrayIndex(val)),
|
val => return Err(RuntimeError::InvalidArrayIndex(val)),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Get the array value
|
||||||
let val = match self.get_var(idx) {
|
let val = match self.get_var(idx) {
|
||||||
Some(val) => val,
|
Some(val) => val,
|
||||||
None => {
|
None => {
|
||||||
@ -320,6 +375,7 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Make sure it is an array
|
||||||
let arr = match val {
|
let arr = match val {
|
||||||
Value::Array(arr) => arr,
|
Value::Array(arr) => arr,
|
||||||
_ => {
|
_ => {
|
||||||
@ -332,12 +388,16 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let arr = arr.borrow_mut();
|
// Get the value of the requested cell inside the array
|
||||||
|
let arr = arr.borrow();
|
||||||
arr.get(arr_idx as usize)
|
arr.get(arr_idx as usize)
|
||||||
.cloned()
|
.cloned()
|
||||||
.ok_or(RuntimeError::ArrayOutOfBounds(arr_idx as usize, arr.len()))
|
.ok_or(RuntimeError::ArrayOutOfBounds(arr_idx as usize, arr.len()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrive the value of a given variable from the varstack. The name is given as a StringID
|
||||||
|
/// and is used to reference the variable name in case of an error. The idx is the stackpos
|
||||||
|
/// where the variable should be located
|
||||||
fn resolve_var(&mut self, name: Sid, idx: usize) -> Result<Value, RuntimeError> {
|
fn resolve_var(&mut self, name: Sid, idx: usize) -> Result<Value, RuntimeError> {
|
||||||
match self.get_var(idx) {
|
match self.get_var(idx) {
|
||||||
Some(val) => Ok(val),
|
Some(val) => Ok(val),
|
||||||
@ -352,9 +412,12 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute a unary operation and get the resulting value
|
||||||
fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Result<Value, RuntimeError> {
|
fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Result<Value, RuntimeError> {
|
||||||
|
// Recursively resolve the operands expression into an actual value
|
||||||
let operand = self.resolve_expr(operand)?;
|
let operand = self.resolve_expr(operand)?;
|
||||||
|
|
||||||
|
// Perform the correct operation, considering the operation and value type
|
||||||
Ok(match (operand, uo) {
|
Ok(match (operand, uo) {
|
||||||
(Value::I64(val), UnOpType::Negate) => Value::I64(-val),
|
(Value::I64(val), UnOpType::Negate) => Value::I64(-val),
|
||||||
(Value::I64(val), UnOpType::BNot) => Value::I64(!val),
|
(Value::I64(val), UnOpType::BNot) => Value::I64(!val),
|
||||||
@ -363,6 +426,7 @@ impl Interpreter {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute a binary operation and get the resulting value
|
||||||
fn resolve_binop(
|
fn resolve_binop(
|
||||||
&mut self,
|
&mut self,
|
||||||
bo: &BinOpType,
|
bo: &BinOpType,
|
||||||
@ -371,8 +435,11 @@ impl Interpreter {
|
|||||||
) -> Result<Value, RuntimeError> {
|
) -> Result<Value, RuntimeError> {
|
||||||
let rhs = self.resolve_expr(rhs)?;
|
let rhs = self.resolve_expr(rhs)?;
|
||||||
|
|
||||||
|
// Handle assignments separate from the other binary operations
|
||||||
match (&bo, &lhs) {
|
match (&bo, &lhs) {
|
||||||
|
// Normal variable assignment
|
||||||
(BinOpType::Assign, Expression::Var(name, idx)) => {
|
(BinOpType::Assign, Expression::Var(name, idx)) => {
|
||||||
|
// Get the variable mutably and assign the right hand side value
|
||||||
match self.get_var_mut(*idx) {
|
match self.get_var_mut(*idx) {
|
||||||
Some(val) => *val = rhs.clone(),
|
Some(val) => *val = rhs.clone(),
|
||||||
None => {
|
None => {
|
||||||
@ -384,14 +451,18 @@ impl Interpreter {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(rhs);
|
return Ok(rhs);
|
||||||
}
|
}
|
||||||
|
// Array index assignment
|
||||||
(BinOpType::Assign, Expression::ArrayAccess(name, idx, arr_idx)) => {
|
(BinOpType::Assign, Expression::ArrayAccess(name, idx, arr_idx)) => {
|
||||||
|
// Calculate the array index
|
||||||
let arr_idx = match self.resolve_expr(arr_idx)? {
|
let arr_idx = match self.resolve_expr(arr_idx)? {
|
||||||
Value::I64(size) if !size.is_negative() => size,
|
Value::I64(size) if !size.is_negative() => size,
|
||||||
val => return Err(RuntimeError::InvalidArrayIndex(val)),
|
val => return Err(RuntimeError::InvalidArrayIndex(val)),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Get the mutable ref to the array variable
|
||||||
let val = match self.get_var_mut(*idx) {
|
let val = match self.get_var_mut(*idx) {
|
||||||
Some(val) => val,
|
Some(val) => val,
|
||||||
None => {
|
None => {
|
||||||
@ -404,7 +475,9 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Verify that it actually is an array
|
||||||
match val {
|
match val {
|
||||||
|
// Assign the right hand side value to the array it the given index
|
||||||
Value::Array(arr) => arr.borrow_mut()[arr_idx as usize] = rhs.clone(),
|
Value::Array(arr) => arr.borrow_mut()[arr_idx as usize] = rhs.clone(),
|
||||||
_ => {
|
_ => {
|
||||||
return Err(RuntimeError::TryingToIndexNonArray(
|
return Err(RuntimeError::TryingToIndexNonArray(
|
||||||
@ -421,8 +494,14 @@ impl Interpreter {
|
|||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This code is only executed if the binop is not an assignment as the assignments return
|
||||||
|
// early
|
||||||
|
|
||||||
|
// Resolve the left hand side to the value
|
||||||
let lhs = self.resolve_expr(lhs)?;
|
let lhs = self.resolve_expr(lhs)?;
|
||||||
|
|
||||||
|
// Perform the appropriate calculations considering the operation type and datatypes of the
|
||||||
|
// two values
|
||||||
let result = match (lhs, rhs) {
|
let result = match (lhs, rhs) {
|
||||||
(Value::I64(lhs), Value::I64(rhs)) => match bo {
|
(Value::I64(lhs), Value::I64(rhs)) => match bo {
|
||||||
BinOpType::Add => Value::I64(lhs + rhs),
|
BinOpType::Add => Value::I64(lhs + rhs),
|
||||||
@ -456,6 +535,8 @@ impl Interpreter {
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a string representation of the given value. This uses the interpreters StringStore to
|
||||||
|
/// retrive the text values of Strings
|
||||||
fn value_to_string(&self, val: &Value) -> String {
|
fn value_to_string(&self, val: &Value) -> String {
|
||||||
match val {
|
match val {
|
||||||
Value::I64(val) => format!("{}", val),
|
Value::I64(val) => format!("{}", val),
|
||||||
@ -476,6 +557,8 @@ mod test {
|
|||||||
use super::{Interpreter, Value};
|
use super::{Interpreter, Value};
|
||||||
use crate::ast::{BinOpType, Expression};
|
use crate::ast::{BinOpType, Expression};
|
||||||
|
|
||||||
|
/// Simple test to check if a simple expression is executed properly.
|
||||||
|
/// Full system tests from lexing to execution can be found in `lib.rs`
|
||||||
#[test]
|
#[test]
|
||||||
fn test_interpreter_expr() {
|
fn test_interpreter_expr() {
|
||||||
// Expression: 1 + 2 * 3 + 4
|
// Expression: 1 + 2 * 3 + 4
|
||||||
|
|||||||
42
src/lexer.rs
42
src/lexer.rs
@ -3,6 +3,7 @@ use thiserror::Error;
|
|||||||
|
|
||||||
use crate::{token::Token, T};
|
use crate::{token::Token, T};
|
||||||
|
|
||||||
|
/// Errors that can occur while lexing a given string
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum LexErr {
|
pub enum LexErr {
|
||||||
#[error("Failed to parse '{0}' as i64")]
|
#[error("Failed to parse '{0}' as i64")]
|
||||||
@ -24,8 +25,11 @@ pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
|
|||||||
lexer.lex()
|
lexer.lex()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The lexer is created from a reference to a sourcecode string and is consumed to create a token
|
||||||
|
/// buffer from that sourcecode.
|
||||||
struct Lexer<'a> {
|
struct Lexer<'a> {
|
||||||
/// The sourcecode text as an iterator over the chars
|
/// The sourcecode text as a peekable iterator over the chars. Peekable allows for look-ahead
|
||||||
|
/// and the use of the Chars iterator allows to support unicode characters
|
||||||
code: Peekable<Chars<'a>>,
|
code: Peekable<Chars<'a>>,
|
||||||
/// The lexed tokens
|
/// The lexed tokens
|
||||||
tokens: Vec<Token>,
|
tokens: Vec<Token>,
|
||||||
@ -34,6 +38,8 @@ struct Lexer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
|
|
||||||
|
/// Create a new lexer from the given sourcecode
|
||||||
fn new(code: &'a str) -> Self {
|
fn new(code: &'a str) -> Self {
|
||||||
let code = code.chars().peekable();
|
let code = code.chars().peekable();
|
||||||
let tokens = Vec::new();
|
let tokens = Vec::new();
|
||||||
@ -45,14 +51,18 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consume the lexer and try to lex the contained sourcecode into a token buffer
|
||||||
fn lex(mut self) -> Result<Vec<Token>, LexErr> {
|
fn lex(mut self) -> Result<Vec<Token>, LexErr> {
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
self.current_char = self.next();
|
self.current_char = self.next();
|
||||||
|
// Match on the current and next character. This gives a 1-char look-ahead and
|
||||||
|
// can be used to directly match 2-char tokens
|
||||||
match (self.current_char, self.peek()) {
|
match (self.current_char, self.peek()) {
|
||||||
// Stop lexing at EOF
|
// Stop lexing at EOF
|
||||||
('\0', _) => break,
|
('\0', _) => break,
|
||||||
|
|
||||||
// Skip whitespace
|
// Skip / ignore whitespace
|
||||||
(' ' | '\t' | '\n' | '\r', _) => (),
|
(' ' | '\t' | '\n' | '\r', _) => (),
|
||||||
|
|
||||||
// Line comment. Consume every char until linefeed (next line)
|
// Line comment. Consume every char until linefeed (next line)
|
||||||
@ -100,9 +110,10 @@ impl<'a> Lexer<'a> {
|
|||||||
// Lex multiple characters together as a string
|
// Lex multiple characters together as a string
|
||||||
('"', _) => self.lex_str()?,
|
('"', _) => self.lex_str()?,
|
||||||
|
|
||||||
// Lex multiple characters together as identifier
|
// Lex multiple characters together as identifier or keyword
|
||||||
('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?,
|
('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?,
|
||||||
|
|
||||||
|
// Any character that was not handled otherwise is invalid
|
||||||
(ch, _) => Err(LexErr::UnexpectedChar(ch))?,
|
(ch, _) => Err(LexErr::UnexpectedChar(ch))?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,7 +143,8 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to convert the string representation of the value to i64
|
// Try to convert the string representation of the value to i64. The error is mapped to
|
||||||
|
// the appropriate LexErr
|
||||||
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
|
||||||
|
|
||||||
self.push_tok(T![i64(i64val)]);
|
self.push_tok(T![i64(i64val)]);
|
||||||
@ -143,24 +155,28 @@ impl<'a> Lexer<'a> {
|
|||||||
/// Lex characters as a string until encountering an unescaped closing doublequoute char '"'.
|
/// Lex characters as a string until encountering an unescaped closing doublequoute char '"'.
|
||||||
/// The successfully lexed string literal token is appended to the stored tokens.
|
/// The successfully lexed string literal token is appended to the stored tokens.
|
||||||
fn lex_str(&mut self) -> Result<(), LexErr> {
|
fn lex_str(&mut self) -> Result<(), LexErr> {
|
||||||
// Opening " was consumed in match
|
// The opening " was consumed in match, so a fresh string can be used
|
||||||
|
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
|
|
||||||
// Read all chars until encountering the closing "
|
// Read all chars until encountering the closing "
|
||||||
loop {
|
loop {
|
||||||
match self.peek() {
|
match self.peek() {
|
||||||
|
// An unescaped doubleqoute ends the current string
|
||||||
'"' => break,
|
'"' => break,
|
||||||
|
|
||||||
// If the end of file is reached while still waiting for '"', error out
|
// If the end of file is reached while still waiting for '"', error out
|
||||||
'\0' => Err(LexErr::MissingClosingString)?,
|
'\0' => Err(LexErr::MissingClosingString)?,
|
||||||
|
|
||||||
_ => match self.next() {
|
_ => match self.next() {
|
||||||
// Backshlash indicates an escaped character
|
// Backslash indicates an escaped character, so consume one more char and
|
||||||
|
// treat it as the escaped char
|
||||||
'\\' => match self.next() {
|
'\\' => match self.next() {
|
||||||
'n' => text.push('\n'),
|
'n' => text.push('\n'),
|
||||||
'r' => text.push('\r'),
|
'r' => text.push('\r'),
|
||||||
't' => text.push('\t'),
|
't' => text.push('\t'),
|
||||||
'\\' => text.push('\\'),
|
'\\' => text.push('\\'),
|
||||||
'"' => text.push('"'),
|
'"' => text.push('"'),
|
||||||
|
// If the escaped char is not handled, it is unsupported and an error
|
||||||
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
ch => Err(LexErr::InvalidStrEscape(ch))?,
|
||||||
},
|
},
|
||||||
// All other characters are simply appended to the string
|
// All other characters are simply appended to the string
|
||||||
@ -219,18 +235,23 @@ impl<'a> Lexer<'a> {
|
|||||||
self.tokens.push(token);
|
self.tokens.push(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `push_tok` but also consumes the next token, removing it from the code iter
|
/// Same as `push_tok` but also consumes the next token, removing it from the code iter. This
|
||||||
|
/// is useful when lexing double char tokens where the second token has only been peeked.
|
||||||
fn push_tok_consume(&mut self, token: Token) {
|
fn push_tok_consume(&mut self, token: Token) {
|
||||||
self.next();
|
self.next();
|
||||||
self.tokens.push(token);
|
self.tokens.push(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advance to next character and return the removed char
|
/// Advance to next character and return the removed char. When the end of the code is reached,
|
||||||
|
/// `'\0'` is returned. This is used instead of an Option::None since it allows for much
|
||||||
|
/// shorter and cleaner code in the main loop. The `'\0'` character would not be valid anyways
|
||||||
fn next(&mut self) -> char {
|
fn next(&mut self) -> char {
|
||||||
self.code.next().unwrap_or('\0')
|
self.code.next().unwrap_or('\0')
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the next character without removing it
|
/// Get the next character without removing it. When the end of the code is reached,
|
||||||
|
/// `'\0'` is returned. This is used instead of an Option::None since it allows for much
|
||||||
|
/// shorter and cleaner code in the main loop. The `'\0'` character would not be valid anyways
|
||||||
fn peek(&mut self) -> char {
|
fn peek(&mut self) -> char {
|
||||||
self.code.peek().copied().unwrap_or('\0')
|
self.code.peek().copied().unwrap_or('\0')
|
||||||
}
|
}
|
||||||
@ -240,6 +261,7 @@ impl<'a> Lexer<'a> {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use crate::{lexer::lex, T};
|
use crate::{lexer::lex, T};
|
||||||
|
|
||||||
|
/// A general test to check if the lexer actually lexes tokens correctly
|
||||||
#[test]
|
#[test]
|
||||||
fn test_lexer() {
|
fn test_lexer() {
|
||||||
let code = r#"53+1-567_000 * / % | ~ ! < > & ^ ({[]});= <- >= <=
|
let code = r#"53+1-567_000 * / % | ~ ! < > & ^ ({[]});= <- >= <=
|
||||||
|
|||||||
@ -7,18 +7,25 @@ pub mod stringstore;
|
|||||||
pub mod astoptimizer;
|
pub mod astoptimizer;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
|
/// A bunch of full program tests using the example code programs as test subjects.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::interpreter::{Interpreter, Value};
|
use crate::interpreter::{Interpreter, Value};
|
||||||
use std::fs::read_to_string;
|
use std::fs::read_to_string;
|
||||||
|
|
||||||
|
/// Run a nek program with the given filename from the examples directory and assert the
|
||||||
|
/// captured output with the expected result. This only works if the program just outputs one
|
||||||
|
/// value as the result
|
||||||
fn run_example_check_single_i64_output(filename: &str, correct_result: i64) {
|
fn run_example_check_single_i64_output(filename: &str, correct_result: i64) {
|
||||||
let mut interpreter = Interpreter::new();
|
let mut interpreter = Interpreter::new();
|
||||||
|
// Enable output capturing. This captures all calls to `print`
|
||||||
interpreter.capture_output = true;
|
interpreter.capture_output = true;
|
||||||
|
|
||||||
|
// Load and run the given program
|
||||||
let code = read_to_string(format!("examples/{filename}")).unwrap();
|
let code = read_to_string(format!("examples/{filename}")).unwrap();
|
||||||
interpreter.run_str(&code);
|
interpreter.run_str(&code);
|
||||||
|
|
||||||
|
// Compare the captured output with the expected value
|
||||||
let expected_output = [Value::I64(correct_result)];
|
let expected_output = [Value::I64(correct_result)];
|
||||||
assert_eq!(interpreter.output(), &expected_output);
|
assert_eq!(interpreter.output(), &expected_output);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,8 @@ use std::{env::args, fs, process::exit};
|
|||||||
|
|
||||||
use nek_lang::{interpreter::Interpreter, nice_panic};
|
use nek_lang::{interpreter::Interpreter, nice_panic};
|
||||||
|
|
||||||
|
/// Cli configuration flags and arguments. This could be done with `clap`, but since only so few
|
||||||
|
/// arguments are supported this seems kind of overkill.
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
struct CliConfig {
|
struct CliConfig {
|
||||||
print_tokens: bool,
|
print_tokens: bool,
|
||||||
@ -38,6 +40,7 @@ fn main() {
|
|||||||
Ok(code) => code,
|
Ok(code) => code,
|
||||||
Err(_) => nice_panic!("Error: Could not read file '{}'", file),
|
Err(_) => nice_panic!("Error: Could not read file '{}'", file),
|
||||||
};
|
};
|
||||||
|
// Lex, parse and run the program
|
||||||
interpreter.run_str(&code);
|
interpreter.run_str(&code);
|
||||||
} else {
|
} else {
|
||||||
println!("Error: No file given\n");
|
println!("Error: No file given\n");
|
||||||
|
|||||||
@ -8,24 +8,34 @@ use crate::{
|
|||||||
T,
|
T,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Errors that can occur while parsing
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum ParseErr {
|
pub enum ParseErr {
|
||||||
#[error("Unexpected Token \"{0:?}\", expected \"{1}\"")]
|
#[error("Unexpected Token \"{0:?}\", expected \"{1}\"")]
|
||||||
UnexpectedToken(Token, String),
|
UnexpectedToken(Token, String),
|
||||||
|
|
||||||
#[error("Left hand side of declaration is not a variable")]
|
#[error("Left hand side of declaration is not a variable")]
|
||||||
DeclarationOfNonVar,
|
DeclarationOfNonVar,
|
||||||
|
|
||||||
#[error("Use of undefined variable \"{0}\"")]
|
#[error("Use of undefined variable \"{0}\"")]
|
||||||
UseOfUndeclaredVar(String),
|
UseOfUndeclaredVar(String),
|
||||||
|
|
||||||
#[error("Use of undefined function \"{0}\"")]
|
#[error("Use of undefined function \"{0}\"")]
|
||||||
UseOfUndeclaredFun(String),
|
UseOfUndeclaredFun(String),
|
||||||
|
|
||||||
#[error("Redeclation of function \"{0}\"")]
|
#[error("Redeclation of function \"{0}\"")]
|
||||||
RedeclarationFun(String),
|
RedeclarationFun(String),
|
||||||
|
|
||||||
#[error("Function not declared at top level \"{0}\"")]
|
#[error("Function not declared at top level \"{0}\"")]
|
||||||
FunctionOnNonTopLevel(String),
|
FunctionOnNonTopLevel(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A result that can either be Ok, or a ParseErr
|
||||||
type ResPE<T> = Result<T, ParseErr>;
|
type ResPE<T> = Result<T, ParseErr>;
|
||||||
|
|
||||||
|
/// This macro can be used to quickly and easily assert if the next token is matching the expected
|
||||||
|
/// token and return an appropriate error if not. Since this is intended to be used inside the
|
||||||
|
/// parser, the first argument should always be `self`.
|
||||||
macro_rules! validate_next {
|
macro_rules! validate_next {
|
||||||
($self:ident, $expected_tok:pat, $expected_str:expr) => {
|
($self:ident, $expected_tok:pat, $expected_str:expr) => {
|
||||||
match $self.next() {
|
match $self.next() {
|
||||||
@ -41,6 +51,7 @@ pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A
|
|||||||
parser.parse()
|
parser.parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A parser that takes in a Token Stream and can create a full abstract syntax tree from it.
|
||||||
struct Parser<T: Iterator<Item = Token>> {
|
struct Parser<T: Iterator<Item = Token>> {
|
||||||
tokens: PutBackIter<T>,
|
tokens: PutBackIter<T>,
|
||||||
string_store: StringStore,
|
string_store: StringStore,
|
||||||
@ -65,6 +76,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consume the parser and try to create the abstract syntax tree from the token stream
|
||||||
pub fn parse(mut self) -> ResPE<Ast> {
|
pub fn parse(mut self) -> ResPE<Ast> {
|
||||||
let main = self.parse_scoped_block()?;
|
let main = self.parse_scoped_block()?;
|
||||||
Ok(Ast {
|
Ok(Ast {
|
||||||
@ -73,25 +85,32 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a series of statements together as a BlockScope. This will continuously parse
|
||||||
|
/// statements until encountering end-of-file or a block end '}' .
|
||||||
fn parse_scoped_block(&mut self) -> ResPE<BlockScope> {
|
fn parse_scoped_block(&mut self) -> ResPE<BlockScope> {
|
||||||
self.parse_scoped_block_fp_offset(0)
|
self.parse_scoped_block_fp_offset(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
|
/// Same as parse_scoped_block, but an offset to the framepointer can be specified to allow
|
||||||
/// encountering end-of-file or a block end '}' .
|
/// for easily passing variables into scopes from the outside. This is used when parsing
|
||||||
fn parse_scoped_block_fp_offset(&mut self, framepoint_offset: usize) -> ResPE<BlockScope> {
|
/// function calls
|
||||||
|
fn parse_scoped_block_fp_offset(&mut self, framepointer_offset: usize) -> ResPE<BlockScope> {
|
||||||
self.nesting_level += 1;
|
self.nesting_level += 1;
|
||||||
let framepointer = self.var_stack.len() - framepoint_offset;
|
let framepointer = self.var_stack.len() - framepointer_offset;
|
||||||
let mut prog = Vec::new();
|
let mut prog = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match self.peek() {
|
match self.peek() {
|
||||||
|
// Just a semicolon is an empty statement. So just consume it
|
||||||
T![;] => {
|
T![;] => {
|
||||||
self.next();
|
self.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// '}' end the current block and EoF ends everything, as the end of the tokenstream
|
||||||
|
// is reached
|
||||||
T![EoF] | T!['}'] => break,
|
T![EoF] | T!['}'] => break,
|
||||||
|
|
||||||
|
// Create a new scoped block
|
||||||
T!['{'] => {
|
T!['{'] => {
|
||||||
self.next();
|
self.next();
|
||||||
prog.push(Statement::Block(self.parse_scoped_block()?));
|
prog.push(Statement::Block(self.parse_scoped_block()?));
|
||||||
@ -99,49 +118,57 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
validate_next!(self, T!['}'], "}");
|
validate_next!(self, T!['}'], "}");
|
||||||
}
|
}
|
||||||
|
|
||||||
// By default try to lex a statement
|
// By default try to lex statements
|
||||||
_ => prog.push(self.parse_stmt()?),
|
_ => prog.push(self.parse_stmt()?),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reset the stack to where it was before entering the scope
|
||||||
self.var_stack.truncate(framepointer);
|
self.var_stack.truncate(framepointer);
|
||||||
self.nesting_level -= 1;
|
self.nesting_level -= 1;
|
||||||
|
|
||||||
Ok(prog)
|
Ok(prog)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a single statement from the tokens.
|
/// Parse a single statement from the tokens
|
||||||
fn parse_stmt(&mut self) -> ResPE<Statement> {
|
fn parse_stmt(&mut self) -> ResPE<Statement> {
|
||||||
let stmt = match self.peek() {
|
let stmt = match self.peek() {
|
||||||
|
// Break statement
|
||||||
T![break] => {
|
T![break] => {
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
|
// After the statement, there must be a semicolon
|
||||||
validate_next!(self, T![;], ";");
|
validate_next!(self, T![;], ";");
|
||||||
|
|
||||||
Statement::Break
|
Statement::Break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Continue statement
|
||||||
T![continue] => {
|
T![continue] => {
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
|
// After the statement, there must be a semicolon
|
||||||
validate_next!(self, T![;], ";");
|
validate_next!(self, T![;], ";");
|
||||||
|
|
||||||
Statement::Continue
|
Statement::Continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loop statement
|
||||||
T![loop] => Statement::Loop(self.parse_loop()?),
|
T![loop] => Statement::Loop(self.parse_loop()?),
|
||||||
|
|
||||||
|
// Print statement
|
||||||
T![print] => {
|
T![print] => {
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
let expr = self.parse_expr()?;
|
let expr = self.parse_expr()?;
|
||||||
|
|
||||||
// After a statement, there must be a semicolon
|
// After the statement, there must be a semicolon
|
||||||
validate_next!(self, T![;], ";");
|
validate_next!(self, T![;], ";");
|
||||||
|
|
||||||
Statement::Print(expr)
|
Statement::Print(expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return statement
|
||||||
T![return] => {
|
T![return] => {
|
||||||
self.next();
|
self.next();
|
||||||
let stmt = Statement::Return(self.parse_expr()?);
|
let stmt = Statement::Return(self.parse_expr()?);
|
||||||
@ -152,23 +179,29 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
stmt
|
stmt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If statement
|
||||||
T![if] => Statement::If(self.parse_if()?),
|
T![if] => Statement::If(self.parse_if()?),
|
||||||
|
|
||||||
|
// Function definition statement
|
||||||
T![fun] => {
|
T![fun] => {
|
||||||
|
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
|
// Expect an identifier as the function name
|
||||||
let fun_name = match self.next() {
|
let fun_name = match self.next() {
|
||||||
T![ident(fun_name)] => fun_name,
|
T![ident(fun_name)] => fun_name,
|
||||||
tok => return Err(ParseErr::UnexpectedToken(tok, "<ident>".to_string())),
|
tok => return Err(ParseErr::UnexpectedToken(tok, "<ident>".to_string())),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Only allow function definitions on the top level
|
||||||
if self.nesting_level > 1 {
|
if self.nesting_level > 1 {
|
||||||
return Err(ParseErr::FunctionOnNonTopLevel(fun_name));
|
return Err(ParseErr::FunctionOnNonTopLevel(fun_name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Intern the function name
|
||||||
let fun_name = self.string_store.intern_or_lookup(&fun_name);
|
let fun_name = self.string_store.intern_or_lookup(&fun_name);
|
||||||
|
|
||||||
|
// Check if the function name already exists
|
||||||
if self.fun_stack.contains(&fun_name) {
|
if self.fun_stack.contains(&fun_name) {
|
||||||
return Err(ParseErr::RedeclarationFun(
|
return Err(ParseErr::RedeclarationFun(
|
||||||
self.string_store
|
self.string_store
|
||||||
@ -178,19 +211,24 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Put the function name on the fucntion stack for precalculating the stack
|
||||||
|
// positions
|
||||||
let fun_stackpos = self.fun_stack.len();
|
let fun_stackpos = self.fun_stack.len();
|
||||||
self.fun_stack.push(fun_name);
|
self.fun_stack.push(fun_name);
|
||||||
|
|
||||||
|
|
||||||
let mut arg_names = Vec::new();
|
let mut arg_names = Vec::new();
|
||||||
|
|
||||||
validate_next!(self, T!['('], "(");
|
validate_next!(self, T!['('], "(");
|
||||||
|
|
||||||
|
// Parse the optional arguments inside the parentheses
|
||||||
while matches!(self.peek(), T![ident(_)]) {
|
while matches!(self.peek(), T![ident(_)]) {
|
||||||
let var_name = match self.next() {
|
let var_name = match self.next() {
|
||||||
T![ident(var_name)] => var_name,
|
T![ident(var_name)] => var_name,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Intern argument names
|
||||||
let var_name = self.string_store.intern_or_lookup(&var_name);
|
let var_name = self.string_store.intern_or_lookup(&var_name);
|
||||||
arg_names.push(var_name);
|
arg_names.push(var_name);
|
||||||
|
|
||||||
@ -221,10 +259,13 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Either a variable declaration statement or an expression statement
|
||||||
_ => {
|
_ => {
|
||||||
|
// To decide if it is a declaration or an expression, a lookahead is needed
|
||||||
let first = self.next();
|
let first = self.next();
|
||||||
|
|
||||||
let stmt = match (first, self.peek()) {
|
let stmt = match (first, self.peek()) {
|
||||||
|
// Identifier and "<-" is a declaration
|
||||||
(T![ident(name)], T![<-]) => {
|
(T![ident(name)], T![<-]) => {
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
@ -240,7 +281,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
rhs,
|
rhs,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
// Anything else must be an expression
|
||||||
(first, _) => {
|
(first, _) => {
|
||||||
|
// Put the first token back in order for the parse_expr to see it
|
||||||
self.putback(first);
|
self.putback(first);
|
||||||
Statement::Expr(self.parse_expr()?)
|
Statement::Expr(self.parse_expr()?)
|
||||||
}
|
}
|
||||||
@ -269,6 +312,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
|
|
||||||
let mut body_false = BlockScope::default();
|
let mut body_false = BlockScope::default();
|
||||||
|
|
||||||
|
// Optionally parse the else part
|
||||||
if self.peek() == &T![else] {
|
if self.peek() == &T![else] {
|
||||||
self.next();
|
self.next();
|
||||||
|
|
||||||
@ -293,9 +337,11 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
let mut condition = None;
|
let mut condition = None;
|
||||||
let mut advancement = None;
|
let mut advancement = None;
|
||||||
|
|
||||||
|
// Check if the optional condition is present
|
||||||
if !matches!(self.peek(), T!['{']) {
|
if !matches!(self.peek(), T!['{']) {
|
||||||
condition = Some(self.parse_expr()?);
|
condition = Some(self.parse_expr()?);
|
||||||
|
|
||||||
|
// Check if the optional advancement is present
|
||||||
if matches!(self.peek(), T![;]) {
|
if matches!(self.peek(), T![;]) {
|
||||||
self.next();
|
self.next();
|
||||||
advancement = Some(self.parse_expr()?);
|
advancement = Some(self.parse_expr()?);
|
||||||
@ -321,7 +367,9 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
self.parse_expr_precedence(lhs, 0)
|
self.parse_expr_precedence(lhs, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse binary expressions with a precedence equal to or higher than min_prec
|
/// Parse binary expressions with a precedence equal to or higher than min_prec.
|
||||||
|
/// This uses the precedence climbing methode for dealing with the operator precedences:
|
||||||
|
/// https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method
|
||||||
fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> ResPE<Expression> {
|
fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> ResPE<Expression> {
|
||||||
while let Some(binop) = &self.peek().try_to_binop() {
|
while let Some(binop) = &self.peek().try_to_binop() {
|
||||||
// Stop if the next operator has a lower binding power
|
// Stop if the next operator has a lower binding power
|
||||||
@ -349,7 +397,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
Ok(lhs)
|
Ok(lhs)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a primary expression (for now only number)
|
/// Parse a primary expression. A primary can be a literal value, variable, function call,
|
||||||
|
/// array indexing, parentheses grouping or a unary operation
|
||||||
fn parse_primary(&mut self) -> ResPE<Expression> {
|
fn parse_primary(&mut self) -> ResPE<Expression> {
|
||||||
let primary = match self.next() {
|
let primary = match self.next() {
|
||||||
// Literal i64
|
// Literal i64
|
||||||
@ -370,6 +419,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
// Array sccess, aka indexing. An ident followed by square brackets containing the
|
// Array sccess, aka indexing. An ident followed by square brackets containing the
|
||||||
// index as an expression
|
// index as an expression
|
||||||
T![ident(name)] if self.peek() == &T!['['] => {
|
T![ident(name)] if self.peek() == &T!['['] => {
|
||||||
|
// Get the stack position of the array variable
|
||||||
let sid = self.string_store.intern_or_lookup(&name);
|
let sid = self.string_store.intern_or_lookup(&name);
|
||||||
let stackpos = self.get_stackpos(sid)?;
|
let stackpos = self.get_stackpos(sid)?;
|
||||||
|
|
||||||
@ -382,6 +432,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
Expression::ArrayAccess(sid, stackpos, index.into())
|
Expression::ArrayAccess(sid, stackpos, index.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Identifier followed by parenthesis is a function call
|
||||||
T![ident(name)] if self.peek() == &T!['('] => {
|
T![ident(name)] if self.peek() == &T!['('] => {
|
||||||
// Skip the opening parenthesis
|
// Skip the opening parenthesis
|
||||||
self.next();
|
self.next();
|
||||||
@ -390,6 +441,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
|
|
||||||
let mut args = Vec::new();
|
let mut args = Vec::new();
|
||||||
|
|
||||||
|
// Parse the arguments as expressions
|
||||||
while !matches!(self.peek(), T![')']) {
|
while !matches!(self.peek(), T![')']) {
|
||||||
let arg = self.parse_expr()?;
|
let arg = self.parse_expr()?;
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
@ -402,19 +454,24 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
|
|
||||||
validate_next!(self, T![')'], ")");
|
validate_next!(self, T![')'], ")");
|
||||||
|
|
||||||
|
// Find the function stack position
|
||||||
let fun_stackpos = self.get_fun_stackpos(sid)?;
|
let fun_stackpos = self.get_fun_stackpos(sid)?;
|
||||||
|
|
||||||
Expression::FunCall(sid, fun_stackpos, args)
|
Expression::FunCall(sid, fun_stackpos, args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Just an identifier is a variable
|
||||||
T![ident(name)] => {
|
T![ident(name)] => {
|
||||||
|
// Find the variable stack position
|
||||||
let sid = self.string_store.intern_or_lookup(&name);
|
let sid = self.string_store.intern_or_lookup(&name);
|
||||||
let stackpos = self.get_stackpos(sid)?;
|
let stackpos = self.get_stackpos(sid)?;
|
||||||
|
|
||||||
Expression::Var(sid, stackpos)
|
Expression::Var(sid, stackpos)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parentheses grouping
|
// Parentheses grouping
|
||||||
T!['('] => {
|
T!['('] => {
|
||||||
|
// Contained inbetween the parentheses can be any other expression
|
||||||
let inner_expr = self.parse_expr()?;
|
let inner_expr = self.parse_expr()?;
|
||||||
|
|
||||||
// Verify that there is a closing parenthesis
|
// Verify that there is a closing parenthesis
|
||||||
@ -425,7 +482,10 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
|
|
||||||
// Unary operations or invalid token
|
// Unary operations or invalid token
|
||||||
tok => match tok.try_to_unop() {
|
tok => match tok.try_to_unop() {
|
||||||
|
// If the token is a valid unary operation, parse it as such
|
||||||
Some(uot) => Expression::UnOp(uot, self.parse_primary()?.into()),
|
Some(uot) => Expression::UnOp(uot, self.parse_primary()?.into()),
|
||||||
|
|
||||||
|
// Otherwise it's an unexpected token
|
||||||
None => return Err(ParseErr::UnexpectedToken(tok, "primary".to_string())),
|
None => return Err(ParseErr::UnexpectedToken(tok, "primary".to_string())),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@ -433,6 +493,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
Ok(primary)
|
Ok(primary)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to get the position of a variable on the variable stack. This is needed to precalculate
|
||||||
|
/// the stackpositions in order to save time when executing
|
||||||
fn get_stackpos(&self, varid: Sid) -> ResPE<usize> {
|
fn get_stackpos(&self, varid: Sid) -> ResPE<usize> {
|
||||||
self.var_stack
|
self.var_stack
|
||||||
.iter()
|
.iter()
|
||||||
@ -447,6 +509,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to get the position of a function on the function stack. This is needed to precalculate
|
||||||
|
/// the stackpositions in order to save time when executing
|
||||||
fn get_fun_stackpos(&self, varid: Sid) -> ResPE<usize> {
|
fn get_fun_stackpos(&self, varid: Sid) -> ResPE<usize> {
|
||||||
self.fun_stack
|
self.fun_stack
|
||||||
.iter()
|
.iter()
|
||||||
@ -461,16 +525,19 @@ impl<T: Iterator<Item = Token>> Parser<T> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the next Token without removing it
|
/// Get the next Token without removing it. If there are no more tokens left, the EoF token is
|
||||||
|
/// returned. This follows the same reasoning as in the Lexer
|
||||||
fn peek(&mut self) -> &Token {
|
fn peek(&mut self) -> &Token {
|
||||||
self.tokens.peek().unwrap_or(&T![EoF])
|
self.tokens.peek().unwrap_or(&T![EoF])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Put a single token back into the token stream
|
||||||
fn putback(&mut self, tok: Token) {
|
fn putback(&mut self, tok: Token) {
|
||||||
self.tokens.putback(tok);
|
self.tokens.putback(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advance to next Token and return the removed Token
|
/// Advance to next Token and return the removed Token. If there are no more tokens left, the
|
||||||
|
/// EoF token is returned. This follows the same reasoning as in the Lexer
|
||||||
fn next(&mut self) -> Token {
|
fn next(&mut self) -> Token {
|
||||||
self.tokens.next().unwrap_or(T![EoF])
|
self.tokens.next().unwrap_or(T![EoF])
|
||||||
}
|
}
|
||||||
@ -484,6 +551,7 @@ mod tests {
|
|||||||
T,
|
T,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// A very simple test to check if the parser correctly parses a simple expression
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parser() {
|
fn test_parser() {
|
||||||
// Expression: 1 + 2 * 3 - 4
|
// Expression: 1 + 2 * 3 - 4
|
||||||
|
|||||||
@ -1,20 +1,35 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// A StringID that identifies a String inside the stringstore. This is only valid for the
|
||||||
|
/// StringStore that created the ID. These StringIDs can be trivialy and cheaply copied
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct Sid(usize);
|
pub struct Sid(usize);
|
||||||
|
|
||||||
|
/// A Datastructure that stores strings, handing out StringIDs that can be used to retrieve the
|
||||||
|
/// real strings at a later point. This is called interning.
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct StringStore {
|
pub struct StringStore {
|
||||||
|
/// The actual strings that are stored in the StringStore. The StringIDs match the index of the
|
||||||
|
/// string inside of this strings vector
|
||||||
strings: Vec<String>,
|
strings: Vec<String>,
|
||||||
|
/// A Hashmap that allows to match already interned Strings to their StringID. This allows for
|
||||||
|
/// deduplication since the same string won't be stored twice
|
||||||
sids: HashMap<String, Sid>,
|
sids: HashMap<String, Sid>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StringStore {
|
impl StringStore {
|
||||||
|
|
||||||
|
/// Create a new empty StringStore
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self { strings: Vec::new(), sids: HashMap::new() }
|
Self { strings: Vec::new(), sids: HashMap::new() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Put the given string into the StringStore and get a StringID in return. If the string is
|
||||||
|
/// not yet stored, it will be after this.
|
||||||
|
///
|
||||||
|
/// Note: The generated StringIDs are only valid for the StringStore that created them. Using
|
||||||
|
/// the IDs with another StringStore is undefined behavior. It might return wrong Strings or
|
||||||
|
/// None.
|
||||||
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
|
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
|
||||||
self.sids.get(text).copied().unwrap_or_else(|| {
|
self.sids.get(text).copied().unwrap_or_else(|| {
|
||||||
let sid = Sid(self.strings.len());
|
let sid = Sid(self.strings.len());
|
||||||
@ -24,6 +39,11 @@ impl StringStore {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lookup and retrieve a string by the StringID. If the String is not found, None is returned.
|
||||||
|
///
|
||||||
|
/// Note: The generated StringIDs are only valid for the StringStore that created them. Using
|
||||||
|
/// the IDs with another StringStore is undefined behavior. It might return wrong Strings or
|
||||||
|
/// None.
|
||||||
pub fn lookup(&self, sid: Sid) -> Option<&String> {
|
pub fn lookup(&self, sid: Sid) -> Option<&String> {
|
||||||
self.strings.get(sid.0)
|
self.strings.get(sid.0)
|
||||||
}
|
}
|
||||||
|
|||||||
14
src/token.rs
14
src/token.rs
@ -64,6 +64,7 @@ pub enum Combo {
|
|||||||
LessThanMinus,
|
LessThanMinus,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tokens are a group of one or more sourcecode characters that have a meaning together
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
/// Literal value token
|
/// Literal value token
|
||||||
@ -72,7 +73,7 @@ pub enum Token {
|
|||||||
/// Keyword token
|
/// Keyword token
|
||||||
Keyword(Keyword),
|
Keyword(Keyword),
|
||||||
|
|
||||||
/// Identifier (name for variables, functions, ...)
|
/// Identifier token (names for variables, functions, ...)
|
||||||
Ident(String),
|
Ident(String),
|
||||||
|
|
||||||
/// Combined tokens consisting of multiple characters
|
/// Combined tokens consisting of multiple characters
|
||||||
@ -87,7 +88,8 @@ pub enum Token {
|
|||||||
/// Semicolon (";")
|
/// Semicolon (";")
|
||||||
Semicolon,
|
Semicolon,
|
||||||
|
|
||||||
/// End of file
|
/// End of file (This is not generated by the lexer, but the parser uses this to find the
|
||||||
|
/// end of the token stream)
|
||||||
EoF,
|
EoF,
|
||||||
|
|
||||||
/// Left Bracket ("[")
|
/// Left Bracket ("[")
|
||||||
@ -182,6 +184,8 @@ impl Token {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If the token can be used as a unary operation type, get the matching UnOpType. Otherwise
|
||||||
|
/// return None
|
||||||
pub fn try_to_unop(&self) -> Option<UnOpType> {
|
pub fn try_to_unop(&self) -> Option<UnOpType> {
|
||||||
Some(match self {
|
Some(match self {
|
||||||
T![-] => UnOpType::Negate,
|
T![-] => UnOpType::Negate,
|
||||||
@ -193,7 +197,11 @@ impl Token {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Macro to quickly create a token of the specified kind
|
/// Macro to quickly create a token of the specified kind. As this is implemented as a macro, it
|
||||||
|
/// can be used anywhere including in patterns.
|
||||||
|
///
|
||||||
|
/// An implementation should exist for each token, so that there is no need to ever write out the
|
||||||
|
/// long token definitions.
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! T {
|
macro_rules! T {
|
||||||
// Keywords
|
// Keywords
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user