Compare commits

...

30 Commits

Author SHA1 Message Date
6096bb431a Playing around with non enum based bytecode 2022-02-01 09:08:09 +01:00
85211b127d Impl missing operators for bytecode 2022-01-31 23:55:44 +01:00
02f258415d Impl dbgprint bytecode 2022-01-31 23:50:52 +01:00
5eae0712bf Add bytecode vm interpreter 2022-01-31 22:14:05 +01:00
e28b3c4f37 Partial refactoring of parser 2022-01-29 19:20:51 +01:00
9e3a642810 Refactor lexer 2022-01-29 14:55:22 +01:00
e62121c75b Implement for loop 2022-01-29 12:29:02 +01:00
ffdce64df8 Update README 2022-01-29 12:28:59 +01:00
d2daa7ae6d Implement non-debug print 2022-01-29 12:28:50 +01:00
abf9eb73c8 Implement strings 2022-01-29 12:28:46 +01:00
39b55b51da Improve runtime performance by 7x 2022-01-28 20:42:21 +01:00
5bf989a640 Implement simple cli 2022-01-28 20:22:50 +01:00
3dacee0be4 Implement debug print 2022-01-28 20:09:15 +01:00
d035724d20 Implement if else 2022-01-28 19:47:07 +01:00
24f5aa30ea Update README 2022-01-28 19:34:57 +01:00
2a014fd210 Implement while loop 2022-01-28 19:34:31 +01:00
788c4a8e82 Implement assignment as binop 2022-01-28 18:56:16 +01:00
7646177030 Implement variable declaration 2022-01-28 18:49:30 +01:00
b128b3357a Update grammar definition 2022-01-28 15:11:46 +01:00
4d5188d9d6 Implement relational binops
- Gt: Greater than
- Ge: Greater or equal
- Lt: Less than
- Le: Less or equal
2022-01-28 15:07:28 +01:00
e28a990b85 Update README 2022-01-28 14:58:21 +01:00
1f1f589dd4 Lex true/false as 1/0 2022-01-28 14:55:10 +01:00
6816392173 Implement equ, neq comparison 2022-01-28 14:46:55 +01:00
3c6fb5466e Implement unary negation 2022-01-28 14:21:57 +01:00
74dbf724a5 Implement parentheses grouping 2022-01-28 14:11:39 +01:00
807482583a Update grammar definition 2022-01-28 14:00:51 +01:00
7b86fecc6f Update README 2022-01-28 12:20:59 +01:00
6b91264f84 Implement more operators
- Mod
- Bitwise Or
- Bitwise And
- Bitwise Xor
- Shift Left
- Shift Right
2022-01-27 23:15:16 +01:00
d9246c7ea1 Implement div & sub 2022-01-27 22:29:06 +01:00
1c4943828f Number separator _ 2022-01-27 21:38:58 +01:00
10 changed files with 1312 additions and 146 deletions

View File

@ -2,6 +2,56 @@
## High level Components
- [ ] Lexer: Transforms text into Tokens
- [ ] Parser: Transforms Tokens into Abstract Syntax Tree
- [ ] Interpreter (tree-walk-interpreter): Walks the tree and evaluates the expressions / statements
- [x] Lexer: Transforms text into Tokens
- [x] Parser: Transforms Tokens into Abstract Syntax Tree
- [x] Interpreter (tree-walk-interpreter): Walks the tree and evaluates the expressions / statements
- [ ] Abstract Syntax Tree Optimizer
## Language features
- [x] Math expressions
- [x] Unary operators
- [x] Negate `-X`
- [x] Parentheses `(X+Y)*Z`
- [x] Logical boolean operators
- [x] Variables
- [x] Declaration
- [x] Assignment
- [x] While loop `while X { ... }`
- [x] If else statement `if X { ... } else { ... }`
- [x] If Statement
- [x] Else statement
- [ ] Line comments `//`
- [x] Strings
- [x] For loops `for X; Y; Z { ... }`
- [ ] IO Intrinsics
- [x] Print
- [ ] ReadLine
## Grammar
### Expressions
```
LITERAL = I64 | Str
expr_primary = LITERAL | IDENT | "(" expr ")" | "-" expr_primary
expr_mul = expr_primary (("*" | "/" | "%") expr_primary)*
expr_add = expr_mul (("+" | "-") expr_mul)*
expr_shift = expr_add ((">>" | "<<") expr_add)*
expr_rel = expr_shift ((">" | ">=" | "<" | "<=") expr_shift)*
expr_equ = expr_rel (("==" | "!=") expr_rel)*
expr_band = expr_equ ("&" expr_equ)*
expr_bxor = expr_band ("^" expr_band)*
expr_bor = expr_bxor ("|" expr_bxor)*
expr = expr_bor
```
## Statements
```
stmt_expr = expr
stmt_let = "let" IDENT "=" expr
stmt_while = "while" expr "{" (stmt)* "}"
stmt_for = "for" stmt_let ";" expr ";" expr "{" (stmt)* "}"
stmt_if = "if" expr "{" (stmt)* "}" ( "else" "{" (stmt)* "}" )
stmt_dbgprint = "$$" expr
stmt_print = "$" expr
stmt = stmt_expr | stmt_let | stmt_while | stmt_for | stmt_if | stmt_dbgprint | stmt_print
```

104
src/ast.rs Normal file
View File

@ -0,0 +1,104 @@
use std::rc::Rc;
/// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType {
/// Addition
Add,
/// Subtraction
Sub,
/// Multiplication
Mul,
/// Divide
Div,
/// Modulo
Mod,
/// Bitwise OR (inclusive or)
BOr,
/// Bitwise And
BAnd,
/// Bitwise Xor (exclusive or)
BXor,
/// Shift Left
Shl,
/// Shift Right
Shr,
/// Check equality
Equ,
/// Check unequality
Neq,
/// Check greater than
Gt,
/// Check greater or equal
Ge,
/// Check less than
Lt,
/// Check less or equal
Le,
/// Assign to a variable
Assign,
}
/// Types for unary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum UnOpType {
/// Negation
Neg,
}
/// A full program abstract syntax tree. This consists of zero or more statements that represents
/// a program.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Ast {
pub prog: Vec<Stmt>,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Stmt {
/// Just a simple expression. This might be an assignment, a function call or a calculation.
Expr(Expr),
/// A variable declaration and assignment. (variable name, assigned value)
Let(String, Expr),
/// A while loop consisting of a condition and a body. (condition, body)
While(Expr, Ast),
/// A for loop consisting of an initialization declaration, a condition, an advancement and a
/// body. ((variable name, initial value), condition, advancement, body)
For((String, Expr), Expr, Expr, Ast),
/// If statement consisting of a condition, a true_body and a false_body.
/// (condition, true_body, false_body)
If(Expr, Ast, Ast),
/// Debug print the value of an expression (show the internal type together with the value)
DbgPrint(Expr),
/// Print the value of an expression
Print(Expr),
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Expr {
/// Integer literal (64-bit)
I64(i64),
/// String literal
Str(Rc<String>),
/// Identifier (variable name)
Ident(String),
/// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expr>, Box<Expr>),
/// Unary operation. Consists of type and the value that is operated on
UnOp(UnOpType, Box<Expr>),
}

211
src/bytecode.rs Normal file
View File

@ -0,0 +1,211 @@
use std::collections::HashMap;
use crate::ast::{Ast, Expr, Stmt, BinOpType};
type OpcodeSize = u32;
#[repr(u32)]
#[derive(Debug, Clone, Copy)]
pub enum OP {
Push,
Pop,
Load,
Store,
Add,
Subtract,
Multiply,
Divide,
Modulo,
BOr,
BAnd,
BXor,
Shl,
Shr,
Eq,
Neq,
Gt,
Ge,
Lt,
Le,
Jump,
JumpTrue,
JumpFalse,
Print,
DbgPrint,
}
#[derive(Debug, Default)]
pub struct Compiler {
ops: Vec<u32>,
global_vars: HashMap<String, u64>,
}
impl Compiler {
pub fn new() -> Self {
Compiler::default()
}
pub fn compile(&mut self, ast: &Ast) {
for stmt in &ast.prog {
match stmt {
Stmt::Expr(expr) => {
self.compile_expr(expr);
self.ops.push(OP::Pop as OpcodeSize);
}
Stmt::Let(name, rhs) => {
let id = self.global_vars.len() as u64;
self.global_vars.insert(name.clone(), id);
self.compile_expr(rhs);
self.gen_store(id);
}
Stmt::While(cond, body) => {
let idx_start = self.ops.len();
self.compile_expr(cond);
self.ops.push(OP::JumpFalse as OpcodeSize);
let idx_jmp = self.ops.len();
self.gen_i64(0);
self.compile(body);
self.ops.push(OP::Jump as OpcodeSize);
self.gen_i64(idx_start as i64);
self.overwrite_i64(idx_jmp, self.ops.len() as i64);
}
Stmt::For(_, _, _, _) => todo!(),
Stmt::If(cond, if_block, else_block) => {
self.compile_expr(cond);
self.ops.push(OP::JumpFalse as OpcodeSize);
let idx_if = self.ops.len();
self.gen_i64(0);
self.compile(if_block);
self.ops.push(OP::Jump as OpcodeSize);
let idx_else = self.ops.len();
self.gen_i64(0);
self.overwrite_i64(idx_if, self.ops.len() as i64);
self.compile(else_block);
self.overwrite_i64(idx_else, self.ops.len() as i64);
},
Stmt::DbgPrint(expr) => {
self.compile_expr(expr);
self.ops.push(OP::DbgPrint as OpcodeSize);
}
Stmt::Print(expr) => {
self.compile_expr(expr);
self.ops.push(OP::Print as OpcodeSize);
}
}
}
}
pub fn into_ops(self) -> Vec<u32> {
self.ops
}
pub fn compile_expr(&mut self, expr: &Expr) {
match expr {
Expr::I64(val) => {
self.ops.push(OP::Push as OpcodeSize);
self.gen_i64(*val)
}
Expr::Ident(name) => {
match self.global_vars.get(name).copied() {
Some(addr) => self.gen_load(addr),
None => panic!("Variable '{}' used before declaration", name),
}
},
Expr::BinOp(bo, lhs, rhs) => self.compile_binop(bo, lhs, rhs),
Expr::UnOp(_, _) => todo!(),
Expr::Str(_) => todo!(),
}
}
fn compile_binop(&mut self, bo: &BinOpType, lhs: &Expr, rhs: &Expr) {
if matches!(bo, BinOpType::Assign) {
self.compile_expr(rhs);
if let Expr::Ident(name) = lhs {
let addr = *self.global_vars.get(name).expect("Trying to assign var before decl");
self.gen_store(addr);
} else {
panic!("Trying to assign value to rvalue");
}
return;
}
self.compile_expr(lhs);
self.compile_expr(rhs);
match bo {
BinOpType::Add => self.ops.push(OP::Add as OpcodeSize),
BinOpType::Sub => self.ops.push(OP::Subtract as OpcodeSize),
BinOpType::Mul => self.ops.push(OP::Multiply as OpcodeSize),
BinOpType::Div => self.ops.push(OP::Divide as OpcodeSize),
BinOpType::Mod => self.ops.push(OP::Modulo as OpcodeSize),
BinOpType::BOr => self.ops.push(OP::BOr as OpcodeSize),
BinOpType::BAnd => self.ops.push(OP::BAnd as OpcodeSize),
BinOpType::BXor => self.ops.push(OP::BXor as OpcodeSize),
BinOpType::Shl => self.ops.push(OP::Shl as OpcodeSize),
BinOpType::Shr => self.ops.push(OP::Shr as OpcodeSize),
BinOpType::Equ => self.ops.push(OP::Eq as OpcodeSize),
BinOpType::Neq => self.ops.push(OP::Neq as OpcodeSize),
BinOpType::Gt => self.ops.push(OP::Gt as OpcodeSize),
BinOpType::Ge => self.ops.push(OP::Ge as OpcodeSize),
BinOpType::Lt => self.ops.push(OP::Lt as OpcodeSize),
BinOpType::Le => self.ops.push(OP::Le as OpcodeSize),
BinOpType::Assign => unreachable!(),
}
}
fn gen_i64(&mut self, val: i64) {
// for i in 0 .. 8 {
// self.ops.push(((val >> i*8) & 0xff) as OpcodeSize);
// }
for i in 0 .. 2 {
self.ops.push(((val >> i*32) & 0xffffffff) as OpcodeSize);
}
}
fn overwrite_i64(&mut self, idx: usize, val: i64) {
// for i in 0 .. 8 {
// self.ops[idx+i] = ((val >> i*8) & 0xff) as OpcodeSize;
// }
for i in 0 .. 2 {
self.ops[idx+i] = ((val >> i*32) & 0xffffffff) as OpcodeSize;
}
}
fn gen_load(&mut self, addr: u64) {
self.ops.push(OP::Load as OpcodeSize);
self.gen_i64(addr as i64)
}
fn gen_store(&mut self, addr: u64) {
self.ops.push(OP::Store as OpcodeSize);
self.gen_i64(addr as i64)
}
}
pub fn compile(ast: &Ast) -> Vec<u32> {
let mut compiler = Compiler::new();
compiler.compile(ast);
compiler.into_ops()
}

View File

@ -1,33 +1,132 @@
use crate::parser::{Ast, BinOpType};
use std::{collections::HashMap, fmt::Display, rc::Rc};
use crate::{
ast::{Ast, BinOpType, Expr, Stmt, UnOpType},
lexer::lex,
parser::parse,
};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value {
I64(i64),
Str(Rc<String>),
}
pub struct Interpreter {
// Runtime storage, for example variables ...
/// The variable table maps all variables by their names to their values
vartable: HashMap<String, Value>,
}
impl Interpreter {
pub fn new() -> Self {
Self {}
let vartable = HashMap::new();
Self { vartable }
}
pub fn run(&mut self, prog: Ast) {
let result = self.resolve_expr(prog);
println!("Result = {:?}", result);
pub fn run_text(&mut self, code: &str, print_tokens: bool, print_ast: bool) {
let tokens = lex(code);
if print_tokens {
println!("Tokens: {:?}", tokens);
}
let ast = parse(tokens);
if print_ast {
println!("Ast:\n{:#?}", ast);
}
self.run(&ast);
}
fn resolve_expr(&mut self, expr: Ast) -> Value {
pub fn run(&mut self, prog: &Ast) {
for stmt in &prog.prog {
match stmt {
Stmt::Expr(expr) => {
self.resolve_expr(expr);
}
Stmt::DbgPrint(expr) => {
let result = self.resolve_expr(expr);
println!("{:?}", result);
}
Stmt::Print(expr) => {
let result = self.resolve_expr(expr);
print!("{}", result);
}
Stmt::Let(name, rhs) => {
let result = self.resolve_expr(rhs);
self.vartable.insert(name.clone(), result);
}
Stmt::For(init, condition, advance, body) => {
// Execute initital let instruction
let init_val = self.resolve_expr(&init.1);
self.vartable.insert(init.0.clone(), init_val);
loop {
// Check condition
match self.resolve_expr(condition) {
Value::I64(val) if val == 0 => break,
Value::I64(_) => (),
Value::Str(text) if text.is_empty() => break,
Value::Str(_) => (),
}
// Execute loop body
self.run(body);
// Execute advancement
self.resolve_expr(advance);
}
}
Stmt::While(condition, body) => {
loop {
// Check condition
match self.resolve_expr(condition) {
Value::I64(val) if val == 0 => break,
Value::I64(_) => (),
Value::Str(text) if text.is_empty() => break,
Value::Str(_) => (),
}
// Execute loop body
self.run(body);
}
}
Stmt::If(condition, body_if, body_else) => {
if matches!(self.resolve_expr(condition), Value::I64(0)) {
self.run(body_else);
} else {
self.run(body_if);
}
}
}
}
}
fn resolve_expr(&mut self, expr: &Expr) -> Value {
match expr {
Ast::I64(val) => Value::I64(val),
Ast::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, *lhs, *rhs),
Expr::I64(val) => Value::I64(*val),
Expr::Str(name) => Value::Str(name.clone()),
Expr::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, &lhs, &rhs),
Expr::UnOp(uo, val) => self.resolve_unop(uo, &val),
Expr::Ident(name) => match self.vartable.get(name) {
None => panic!("Runtime error: Use of undeclared variable '{}'", name),
Some(val) => val.clone(),
},
}
}
fn resolve_binop(&mut self, bo: &BinOpType, lhs: &Expr, rhs: &Expr) -> Value {
// Treat assignment separate from the other expressions
if matches!(bo, BinOpType::Assign) {
match lhs {
Expr::Ident(name) => {
let rhs = self.resolve_expr(rhs);
self.vartable.get_mut(name).map(|var| *var = rhs.clone());
return rhs;
}
_ => panic!("Runtime error: Left hand side of assignment must be an identifier"),
}
}
fn resolve_binop(&mut self, bo: BinOpType, lhs: Ast, rhs: Ast) -> Value {
let lhs = self.resolve_expr(lhs);
let rhs = self.resolve_expr(rhs);
@ -35,36 +134,70 @@ impl Interpreter {
(Value::I64(lhs), Value::I64(rhs)) => match bo {
BinOpType::Add => Value::I64(lhs + rhs),
BinOpType::Mul => Value::I64(lhs * rhs),
BinOpType::Sub => Value::I64(lhs - rhs),
BinOpType::Div => Value::I64(lhs / rhs),
BinOpType::Mod => Value::I64(lhs % rhs),
BinOpType::BOr => Value::I64(lhs | rhs),
BinOpType::BAnd => Value::I64(lhs & rhs),
BinOpType::BXor => Value::I64(lhs ^ rhs),
BinOpType::Shr => Value::I64(lhs >> rhs),
BinOpType::Shl => Value::I64(lhs << rhs),
BinOpType::Equ => Value::I64(if lhs == rhs { 1 } else { 0 }),
BinOpType::Neq => Value::I64(if lhs != rhs { 1 } else { 0 }),
BinOpType::Gt => Value::I64(if lhs > rhs { 1 } else { 0 }),
BinOpType::Ge => Value::I64(if lhs >= rhs { 1 } else { 0 }),
BinOpType::Lt => Value::I64(if lhs < rhs { 1 } else { 0 }),
BinOpType::Le => Value::I64(if lhs <= rhs { 1 } else { 0 }),
BinOpType::Assign => unreachable!(),
},
// _ => panic!("Value types are not compatible"),
_ => panic!("Value types are not compatible"),
}
}
fn resolve_unop(&mut self, uo: &UnOpType, val: &Expr) -> Value {
let val = self.resolve_expr(val);
match val {
Value::I64(val) => match uo {
UnOpType::Neg => Value::I64(-val),
},
_ => panic!("Invalid unary operation for type"),
}
}
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::I64(val) => write!(f, "{}", val),
Value::Str(text) => write!(f, "{}", text),
}
}
}
#[cfg(test)]
mod test {
use crate::parser::{Ast, BinOpType};
use super::{Interpreter, Value};
use crate::ast::{BinOpType, Expr};
#[test]
fn test_interpreter_expr() {
// Expression: 1 + 2 * 3 + 4
// With precedence: (1 + (2 * 3)) + 4
let ast = Ast::BinOp(
let ast = Expr::BinOp(
BinOpType::Add,
Ast::BinOp(
Expr::BinOp(
BinOpType::Add,
Ast::I64(1).into(),
Ast::BinOp(BinOpType::Mul, Ast::I64(2).into(), Ast::I64(3).into()).into(),
Expr::I64(1).into(),
Expr::BinOp(BinOpType::Mul, Expr::I64(2).into(), Expr::I64(3).into()).into(),
)
.into(),
Ast::I64(4).into(),
Expr::I64(4).into(),
);
let expected = Value::I64(11);
let mut interpreter = Interpreter::new();
let actual = interpreter.resolve_expr(ast);
let actual = interpreter.resolve_expr(&ast);
assert_eq!(expected, actual);
}

View File

@ -1,20 +1,11 @@
use std::{iter::Peekable, str::Chars};
use crate::parser::BinOpType;
use crate::token::{Keyword, Literal, Token};
#[derive(Debug, PartialEq, Eq)]
pub enum Token {
/// Integer literal (64-bit)
I64(i64),
/// Plus (+)
Add,
/// Asterisk (*)
Mul,
/// End of file
EoF,
/// Lex the provided code into a Token Buffer
pub fn lex(code: &str) -> Vec<Token> {
let mut lexer = Lexer::new(code);
lexer.lex()
}
struct Lexer<'a> {
@ -27,87 +18,218 @@ impl<'a> Lexer<'a> {
Self { code }
}
/// Advance to next character and return the removed char. If there is no next char, '\0'
/// is returned.
fn next(&mut self) -> char {
self.code.next().unwrap_or('\0')
}
/// Get the next character without removing it. If there is no next char, '\0' is returned.
fn peek(&mut self) -> char {
self.code.peek().copied().unwrap_or('\0')
}
fn lex(&mut self) -> Vec<Token> {
let mut tokens = Vec::new();
while let Some(ch) = self.next() {
match ch {
loop {
match self.next() {
// End of text
'\0' => break,
// Skip whitespace
' ' => (),
' ' | '\r' | '\n' | '\t' => (),
// Handle tokens that span two characters
'>' if matches!(self.peek(), '>') => {
self.next();
tokens.push(Token::Shr);
}
'<' if matches!(self.peek(), '<') => {
self.next();
tokens.push(Token::Shl);
}
'=' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::Equ);
}
'!' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::Neq);
}
'<' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::Le);
}
'>' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::Ge);
}
'$' if matches!(self.peek(), '$') => {
self.next();
tokens.push(Token::DoubleDollar);
}
// Handle tokens that span one character
'+' => tokens.push(Token::Add),
'-' => tokens.push(Token::Sub),
'*' => tokens.push(Token::Mul),
'/' => tokens.push(Token::Div),
'%' => tokens.push(Token::Mod),
'|' => tokens.push(Token::BOr),
'&' => tokens.push(Token::BAnd),
'^' => tokens.push(Token::BXor),
'(' => tokens.push(Token::LParen),
')' => tokens.push(Token::RParen),
'<' => tokens.push(Token::Lt),
'>' => tokens.push(Token::Gt),
'=' => tokens.push(Token::Assign),
';' => tokens.push(Token::Semicolon),
'{' => tokens.push(Token::LBrace),
'}' => tokens.push(Token::RBrace),
'$' => tokens.push(Token::Dollar),
// Handle special multicharacter tokens
// Lex numbers
'0'..='9' => {
let mut sval = String::from(ch);
ch @ '0'..='9' => tokens.push(self.lex_number(ch)),
// Do as long as a next char exists and it is a numeric char
while let Some('0'..='9') = self.peek() {
// The next char is verified to be Some, so unwrap is safe
sval.push(self.next().unwrap());
}
// Lex strings
'"' => tokens.push(self.lex_string()),
// TODO: We only added numeric chars to the string, but the conversion could still fail
tokens.push(Token::I64(sval.parse().unwrap()));
}
// Lex identifiers
ch @ ('a'..='z' | 'A'..='Z' | '_') => tokens.push(self.lex_ident(ch)),
'+' => tokens.push(Token::Add),
'*' => tokens.push(Token::Mul),
//TODO: Don't panic, keep calm
_ => panic!("Lexer encountered unexpected char: '{}'", ch),
// Any other character is unexpected
ch => panic!("Lexer encountered unexpected char: '{}'", ch),
}
}
tokens
}
/// Advance to next character and return the removed char
fn next(&mut self) -> Option<char> {
self.code.next()
fn lex_number(&mut self, first_char: char) -> Token {
let mut sval = String::from(first_char);
// Do as long as a next char exists and it is a numeric char
loop {
// The next char is verified to be Some, so unwrap is safe
match self.peek() {
// Underscore is a separator, so remove it but don't add to number
'_' => {
self.next();
}
'0'..='9' => {
sval.push(self.next());
}
// Next char is not a number, so stop and finish the number token
_ => break,
}
}
/// Get the next character without removing it
fn peek(&mut self) -> Option<char> {
self.code.peek().copied()
// TODO: We only added numeric chars to the string, but the conversion could still fail
Token::Literal(Literal::I64(sval.parse().unwrap()))
}
}
/// Lex the provided code into a Token Buffer
///
/// TODO: Don't panic and implement error handling using Result
pub fn lex(code: &str) -> Vec<Token> {
let mut lexer = Lexer::new(code);
lexer.lex()
}
/// Lex an identifier from the character stream. The first char has to have been consumed
/// from the stream already and is passed as an argument instead.
fn lex_ident(&mut self, first_char: char) -> Token {
let mut ident = String::from(first_char);
impl Token {
pub fn try_to_binop(&self) -> Option<BinOpType> {
Some(match self {
Token::Add => BinOpType::Add,
Token::Mul => BinOpType::Mul,
_ => return None,
})
// Do as long as a next char exists and it is a valid ident char
while let 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' = self.peek() {
// The next char is verified to be Some, so unwrap is safe
ident.push(self.next());
}
// Check if the identifier is a keyword
match ident.as_str() {
"true" => Token::Literal(Literal::I64(1)),
"false" => Token::Literal(Literal::I64(0)),
"let" => Token::Keyword(Keyword::Let),
"while" => Token::Keyword(Keyword::While),
"if" => Token::Keyword(Keyword::If),
"else" => Token::Keyword(Keyword::Else),
"for" => Token::Keyword(Keyword::For),
_ => Token::Ident(ident),
}
}
/// Lex a string token from the character stream. This requires the initial quote '"' to be
/// consumed before.
fn lex_string(&mut self) -> Token {
let mut text = String::new();
let mut escape = false;
// Do as long as a next char exists and it is not '"'
loop {
if escape {
escape = false;
// Escape characters
match self.next() {
'\\' => text.push('\\'),
'n' => text.push('\n'),
'r' => text.push('\r'),
't' => text.push('\t'),
ch => panic!("Invalid string escape: '{:?}'", ch),
}
} else {
match self.peek() {
// Doublequote '"' ends the string lexing
'"' => {
self.next();
break;
}
// Backslash '\' escapes the next character
'\\' => {
self.next();
escape = true;
}
// Reached end of text but didn't encounter closing doublequote '"'
'\0' => panic!("String is never terminated (missing '\"')"),
_ => text.push(self.next()),
}
}
}
Token::Literal(Literal::Str(text))
}
}
#[cfg(test)]
mod tests {
use crate::token::Literal;
use super::{lex, Token};
#[test]
fn test_lexer() {
let code = "33 +5*2 + 4456467*2334+3";
let code = "33 +5*2 + 4456467*2334+3 % - / << ^ | & >>";
let expected = vec![
Token::I64(33),
Token::Literal(Literal::I64(33)),
Token::Add,
Token::I64(5),
Token::Literal(Literal::I64(5)),
Token::Mul,
Token::I64(2),
Token::Literal(Literal::I64(2)),
Token::Add,
Token::I64(4456467),
Token::Literal(Literal::I64(4456467)),
Token::Mul,
Token::I64(2334),
Token::Literal(Literal::I64(2334)),
Token::Add,
Token::I64(3),
Token::Literal(Literal::I64(3)),
Token::Mod,
Token::Sub,
Token::Div,
Token::Shl,
Token::BXor,
Token::BOr,
Token::BAnd,
Token::Shr,
];
let actual = lex(code);

View File

@ -1,3 +1,7 @@
pub mod lexer;
pub mod parser;
pub mod interpreter;
pub mod token;
pub mod ast;
pub mod bytecode;
pub mod vm;

View File

@ -1,23 +1,63 @@
use nek_lang::{lexer::lex, parser::parse, interpreter::Interpreter};
use std::{env::args, io::Write};
use nek_lang::{interpreter::Interpreter, lexer::lex, parser::parse, bytecode::compile, vm::Vm};
#[derive(Debug, Default)]
struct CliConfig {
print_tokens: bool,
print_ast: bool,
interactive: bool,
file: Option<String>,
}
fn main() {
let mut cfg = CliConfig::default();
let mut code = String::new();
std::io::stdin().read_line(&mut code).unwrap();
let code = code.trim();
let tokens = lex(&code);
println!("Tokens: {:?}\n", tokens);
let ast = parse(tokens);
println!("Ast: {:#?}\n", ast);
for arg in args().skip(1) {
match arg.as_str() {
"--tokens" | "-t" => cfg.print_tokens = true,
"--ast" | "-a" => cfg.print_ast = true,
"--interactive" | "-i" => cfg.interactive = true,
file if cfg.file.is_none() => cfg.file = Some(file.to_string()),
_ => panic!("Invalid argument: '{}'", arg),
}
}
let mut interpreter = Interpreter::new();
interpreter.run(ast);
if let Some(file) = &cfg.file {
let code = std::fs::read_to_string(file).expect(&format!("File not found: '{}'", file));
let tokens = lex(&code);
let ast = parse(tokens);
let prog = compile(&ast);
// println!("{:?}", prog);
let mut vm = Vm::new(prog);
vm.run();
// interpreter.run_text(&code, cfg.print_tokens, cfg.print_ast);
}
if cfg.interactive || cfg.file.is_none() {
let mut code = String::new();
loop {
print!(">> ");
std::io::stdout().flush().unwrap();
code.clear();
std::io::stdin().read_line(&mut code).unwrap();
let code = code.trim();
if code == "exit" {
break;
}
interpreter.run_text(&code, cfg.print_tokens, cfg.print_ast);
}
}
}

View File

@ -1,24 +1,9 @@
use std::iter::Peekable;
use crate::lexer::Token;
/// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType {
/// Addition
Add,
/// Multiplication
Mul,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Ast {
/// Integer literal (64-bit)
I64(i64),
/// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Ast>, Box<Ast>),
}
use crate::{
ast::{Ast, BinOpType, Expr, Stmt, UnOpType},
token::{Keyword, Literal, Token},
};
struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>,
@ -31,18 +16,173 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Self { tokens }
}
fn parse(&mut self) -> Ast {
self.parse_expr()
/// Get the next Token without removing it
fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&Token::EoF)
}
fn parse_expr(&mut self) -> Ast {
/// Advance to next Token and return the removed Token
fn next(&mut self) -> Token {
self.tokens.next().unwrap_or(Token::EoF)
}
fn parse(&mut self) -> Ast {
let mut prog = Vec::new();
loop {
let stmt = match self.peek() {
Token::Semicolon => {
self.next();
continue;
}
Token::EoF => break,
Token::RBrace => break,
Token::Keyword(keyword) => match keyword {
Keyword::Let => self.parse_let_stmt(),
Keyword::While => self.parse_while(),
Keyword::If => self.parse_if(),
Keyword::For => self.parse_for(),
Keyword::Else => panic!("Unexpected else keyword"),
},
Token::Dollar => {
self.next();
Stmt::Print(self.parse_expr())
}
Token::DoubleDollar => {
self.next();
Stmt::DbgPrint(self.parse_expr())
}
// By default try to parse an expression
_ => Stmt::Expr(self.parse_expr()),
};
prog.push(stmt);
}
Ast { prog }
}
fn parse_for(&mut self) -> Stmt {
if !matches!(self.next(), Token::Keyword(Keyword::For)) {
panic!("Error parsing for: Expected for token");
}
let init = match self.parse_let_stmt() {
Stmt::Let(name, rhs) => (name, rhs),
_ => unreachable!(),
};
if !matches!(self.next(), Token::Semicolon) {
panic!("Error parsing for: Expected semicolon token");
}
let condition = self.parse_expr();
if !matches!(self.next(), Token::Semicolon) {
panic!("Error parsing for: Expected semicolon token");
}
let advance = self.parse_expr();
if !matches!(self.next(), Token::LBrace) {
panic!("Error parsing for: Expected '{{' token");
}
let body = self.parse();
if !matches!(self.next(), Token::RBrace) {
panic!("Error parsing for: Expected '}}' token");
}
Stmt::For(init, condition, advance, body)
}
fn parse_if(&mut self) -> Stmt {
if !matches!(self.next(), Token::Keyword(Keyword::If)) {
panic!("Error parsing if: Expected if token");
}
let condition = self.parse_expr();
if !matches!(self.next(), Token::LBrace) {
panic!("Error parsing if: Expected '{{' token");
}
let body_if = self.parse();
if !matches!(self.next(), Token::RBrace) {
panic!("Error parsing if: Expected '}}' token");
}
let mut body_else = Ast { prog: Vec::new() };
if matches!(self.peek(), Token::Keyword(Keyword::Else)) {
self.next();
if !matches!(self.next(), Token::LBrace) {
panic!("Error parsing else: Expected '{{' token");
}
body_else = self.parse();
if !matches!(self.next(), Token::RBrace) {
panic!("Error parsing else: Expected '}}' token");
}
}
Stmt::If(condition, body_if, body_else)
}
fn parse_while(&mut self) -> Stmt {
if !matches!(self.next(), Token::Keyword(Keyword::While)) {
panic!("Error parsing while: Expected while token");
}
let condition = self.parse_expr();
if !matches!(self.next(), Token::LBrace) {
panic!("Error parsing while: Expected '{{' token");
}
let body = self.parse();
if !matches!(self.next(), Token::RBrace) {
panic!("Error parsing while: Expected '}}' token");
}
Stmt::While(condition, body)
}
fn parse_let_stmt(&mut self) -> Stmt {
if !matches!(self.next(), Token::Keyword(Keyword::Let)) {
panic!("Error parsing let: Expected let token");
}
let name = match self.next() {
Token::Ident(name) => name,
_ => panic!("Error parsing let: Expected identifier after let"),
};
if !matches!(self.next(), Token::Assign) {
panic!("Error parsing let: Expected assignment token");
}
let rhs = self.parse_expr();
Stmt::Let(name, rhs)
}
fn parse_expr(&mut self) -> Expr {
let lhs = self.parse_primary();
self.parse_expr_precedence(lhs, 0)
}
/// Parse binary expressions with a precedence equal to or higher than min_prec
fn parse_expr_precedence(&mut self, mut lhs: Ast, min_prec: u8) -> Ast {
fn parse_expr_precedence(&mut self, mut lhs: Expr, min_prec: u8) -> Expr {
while let Some(binop) = &self.peek().try_to_binop() {
// Stop if the next operator has a lower binding power
if !(binop.precedence() >= min_prec) {
break;
}
@ -61,30 +201,40 @@ impl<T: Iterator<Item = Token>> Parser<T> {
rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1);
}
lhs = Ast::BinOp(binop, lhs.into(), rhs.into());
lhs = Expr::BinOp(binop, lhs.into(), rhs.into());
}
lhs
}
/// Parse a primary expression (for now only number)
fn parse_primary(&mut self) -> Ast {
fn parse_primary(&mut self) -> Expr {
match self.next() {
Token::I64(val) => Ast::I64(val),
Token::Literal(Literal::I64(val)) => Expr::I64(val),
Token::Literal(Literal::Str(text)) => Expr::Str(text.into()),
Token::Ident(name) => Expr::Ident(name),
Token::LParen => {
// The tokens was an opening parenthesis, so parse a full expression again as the
// expression inside the parentheses `"(" expr ")"`
let inner = self.parse_expr();
// If there is no closing parenthesis after the expression, it is a syntax error
if !matches!(self.next(), Token::RParen) {
panic!("Error parsing primary expr: Missing closing parenthesis ')'");
}
inner
}
Token::Sub => Expr::UnOp(UnOpType::Neg, self.parse_primary().into()),
tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok),
}
}
/// Get the next Token without removing it
fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&Token::EoF)
}
/// Advance to next Token and return the removed Token
fn next(&mut self) -> Token {
self.tokens.next().unwrap_or(Token::EoF)
}
}
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
@ -95,44 +245,62 @@ pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
fn precedence(&self) -> u8 {
match self {
BinOpType::Add => 0,
BinOpType::Mul => 1,
BinOpType::Assign => 0,
BinOpType::BOr => 1,
BinOpType::BXor => 2,
BinOpType::BAnd => 3,
BinOpType::Equ | BinOpType::Neq => 4,
BinOpType::Gt | BinOpType::Ge | BinOpType::Lt | BinOpType::Le => 5,
BinOpType::Shl | BinOpType::Shr => 6,
BinOpType::Add | BinOpType::Sub => 7,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 8,
}
}
}
#[cfg(test)]
mod tests {
use super::{parse, Ast, BinOpType};
use crate::lexer::Token;
use super::{parse, BinOpType, Expr};
use crate::{
parser::{Ast, Stmt},
token::{Literal, Token},
};
#[test]
fn test_parser() {
// Expression: 1 + 2 * 3 + 4
// With precedence: (1 + (2 * 3)) + 4
let tokens = [
Token::I64(1),
Token::Literal(Literal::I64(1)),
Token::Add,
Token::I64(2),
Token::Literal(Literal::I64(2)),
Token::Mul,
Token::I64(3),
Token::Add,
Token::I64(4),
Token::Literal(Literal::I64(3)),
Token::Sub,
Token::Literal(Literal::I64(4)),
];
let expected = Ast::BinOp(
let expected = Expr::BinOp(
BinOpType::Sub,
Expr::BinOp(
BinOpType::Add,
Ast::BinOp(
BinOpType::Add,
Ast::I64(1).into(),
Ast::BinOp(BinOpType::Mul, Ast::I64(2).into(), Ast::I64(3).into()).into(),
Expr::I64(1).into(),
Expr::BinOp(BinOpType::Mul, Expr::I64(2).into(), Expr::I64(3).into()).into(),
)
.into(),
Ast::I64(4).into(),
Expr::I64(4).into(),
);
let expected = Ast {
prog: vec![Stmt::Expr(expected)],
};
let actual = parse(tokens);
assert_eq!(expected, actual);
}

147
src/token.rs Normal file
View File

@ -0,0 +1,147 @@
use crate::ast::BinOpType;
#[derive(Debug, PartialEq, Eq)]
pub enum Literal {
/// Integer literal (64-bit)
I64(i64),
/// String literal ("Some string")
Str(String),
}
#[derive(Debug, PartialEq, Eq)]
pub enum Keyword {
/// Let identifier (let)
Let,
/// While (while)
While,
/// For (for)
For,
/// If (if)
If,
/// Else (else)
Else,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Token {
/// Literal values
Literal(Literal),
/// Identifier (variable / function / ... name)
Ident(String),
/// Specific identifiers that have a special meaning as keywords
Keyword(Keyword),
/// Left parenthesis ('(')
LParen,
/// Right parentheses (')')
RParen,
/// Left brace ({)
LBrace,
/// Right brace (})
RBrace,
/// Dollar sign ($)
Dollar,
/// Double Dollar sign ($$)
DoubleDollar,
/// Assignment (single equal) (=)
Assign,
/// Plus (+)
Add,
/// Minus (-)
Sub,
/// Asterisk (*)
Mul,
/// Slash (/)
Div,
/// Percent (%)
Mod,
/// Pipe (|)
BOr,
/// Ampersand (&)
BAnd,
/// Circumflex (^)
BXor,
/// Shift Left (<<)
Shl,
/// Shift Right (>>)
Shr,
/// Equal sign (==)
Equ,
/// Not Equal sign (!=)
Neq,
/// Greater than (>)
Gt,
/// Greater or equal (>=)
Ge,
/// Less than (<)
Lt,
/// Less or equal (<=)
Le,
/// Semicolon (;)
Semicolon,
/// End of file
EoF,
}
impl Token {
pub fn try_to_binop(&self) -> Option<BinOpType> {
Some(match self {
Token::Add => BinOpType::Add,
Token::Sub => BinOpType::Sub,
Token::Mul => BinOpType::Mul,
Token::Div => BinOpType::Div,
Token::Mod => BinOpType::Mod,
Token::BAnd => BinOpType::BAnd,
Token::BOr => BinOpType::BOr,
Token::BXor => BinOpType::BXor,
Token::Shl => BinOpType::Shl,
Token::Shr => BinOpType::Shr,
Token::Equ => BinOpType::Equ,
Token::Neq => BinOpType::Neq,
Token::Gt => BinOpType::Gt,
Token::Ge => BinOpType::Ge,
Token::Lt => BinOpType::Lt,
Token::Le => BinOpType::Le,
Token::Assign => BinOpType::Assign,
_ => return None,
})
}
}

187
src/vm.rs Normal file
View File

@ -0,0 +1,187 @@
use crate::{bytecode::OP, interpreter::Value};
#[derive(Debug, Default)]
pub struct Vm {
prog: Vec<u32>,
ip: usize,
stack: Vec<Value>,
/// This isn't actually a heap. It's actually still more of a f*cked up stack
heap: Vec<Value>,
}
impl Vm {
pub fn new(prog: Vec<u32>) -> Self {
Self {
prog,
..Default::default()
}
}
pub fn run(&mut self) {
while let Some(op) = self.prog.get(self.ip).copied().map(|op| unsafe { std::mem::transmute::<u32, OP>(op) }) {
self.ip += 1;
match op {
OP::Push => {
let val = self.read_i64();
self.stack.push(Value::I64(val));
}
OP::Pop => {
self.stack.pop();
}
OP::Load => {
let addr = self.read_i64() as usize;
if let Some(val) = self.heap.get(addr) {
self.stack.push(val.clone());
} else {
panic!("Trying to load from uninitialized heap");
}
}
OP::Store => {
let val = self
.stack
.pop()
.expect("Trying to pop value from stack for storing");
let addr = self.read_i64() as usize;
if self.heap.len() == addr {
self.heap.push(val);
} else {
self.heap[addr] = val;
}
}
OP::Print => {
let val = self
.stack
.pop()
.expect("Trying to pop value from stack for printing");
print!("{}", val);
}
OP::DbgPrint => {
let val = self
.stack
.pop()
.expect("Trying to pop value from stack for printing");
print!("{:?}", val);
}
OP::Add => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 + vals.1))
}
OP::Subtract => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 - vals.1))
}
OP::Multiply => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 * vals.1))
}
OP::Divide => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 / vals.1))
}
OP::Modulo => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 % vals.1))
}
OP::Eq => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 == vals.1 { 1 } else { 0 }))
}
OP::Neq => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 != vals.1 { 1 } else { 0 }))
}
OP::Gt => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 > vals.1 { 1 } else { 0 }))
}
OP::Ge => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 >= vals.1 { 1 } else { 0 }))
}
OP::Lt => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 < vals.1 { 1 } else { 0 }))
}
OP::Le => {
let vals = self.pop2_i64();
self.stack
.push(Value::I64(if vals.0 <= vals.1 { 1 } else { 0 }))
}
OP::BOr => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 | vals.1))
}
OP::BAnd => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 & vals.1))
}
OP::BXor => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 ^ vals.1))
}
OP::Shl => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 << vals.1))
}
OP::Shr => {
let vals = self.pop2_i64();
self.stack.push(Value::I64(vals.0 >> vals.1))
}
OP::Jump => {
self.ip = self.read_i64() as usize;
}
OP::JumpTrue => {
let jmp_target = self.read_i64() as usize;
if !matches!(self.stack.pop(), Some(Value::I64(0))) {
self.ip = jmp_target;
}
}
OP::JumpFalse => {
let jmp_target = self.read_i64() as usize;
if matches!(self.stack.pop(), Some(Value::I64(0))) {
self.ip = jmp_target;
}
}
}
}
}
fn pop2_i64(&mut self) -> (i64, i64) {
let rhs = self.stack.pop();
let lhs = self.stack.pop();
match (lhs, rhs) {
(Some(Value::I64(lhs)), Some(Value::I64(rhs))) => (lhs, rhs),
_ => panic!("Invalid data for add"),
}
}
fn read_i64(&mut self) -> i64 {
let mut val = *self.prog.get(self.ip).unwrap() as i64;
val |= (*self.prog.get(self.ip + 1).unwrap() as i64) << 32;
// let mut bytes = [0; 8];
// bytes.copy_from_slice(&self.prog[self.ip..self.ip+8]);
// val = i64::from_le_bytes(bytes);
// for i in 0 .. 8 {
// if let Some(tmp) = self.prog.get(self.ip + i).copied() {
// val |= ((tmp as i64) << i*8) as i64;
// } else {
// panic!("Expected Value as next OP")
// }
// }
self.ip += 2;
val
}
}