41 Commits

Author SHA1 Message Date
21228ff3d7 Implement vec based scopes
- Replaced vartable hashmap with vec
- Use linear search in reverse to find the variables by name
- This is really fast with a small number of variables but tanks fast
  with more vars due to O(n) lookup times
- Implemented scopes by dropping all elements from the vartable at the
  end of a scope
2022-02-03 22:09:58 +01:00
588b3b5b2c Autoformat 2022-02-03 17:38:25 +01:00
f6152670aa Small refactor for lexer 2022-02-03 17:25:55 +01:00
c2b9ee71b8 Add project euler example 5 2022-02-03 16:16:38 +01:00
f8e5bd7423 Add comments to parser 2022-02-03 16:01:33 +01:00
d7001a5c52 Refactor, Comments, Bugfix for lexer
- Small refactoring in the lexer
- Added some more comments to the lexer
- Fixed endless loop when encountering comment in last line
2022-02-03 00:44:48 +01:00
bc68d9fa49 Add Result + Err to lexer 2022-02-02 21:59:46 +01:00
264d8f92f4 Update README 2022-02-02 19:40:10 +01:00
d8f5b876ac Implement String Literals
- String literals can be stored in variables, but are fully immutable
  and are not compatible with any operators
2022-02-02 19:38:28 +01:00
8cf6177cbc Update README 2022-02-02 19:15:20 +01:00
39bd4400b4 Implement logical not 2022-02-02 19:14:11 +01:00
75b99869d4 Rework README
- Add full language description
- Fix variable name inconsistency
2022-02-02 19:00:14 +01:00
de0bbb8171 Implement logical and / or 2022-02-02 18:56:45 +01:00
92f59cbf9a Update README 2022-02-02 16:48:26 +01:00
dd9ca660cc Move ast into separate file 2022-02-02 16:43:14 +01:00
7e2ef49481 Move token into separate file 2022-02-02 16:40:05 +01:00
86130984e2 Add example programs (project euler) 2022-02-02 16:26:37 +01:00
c4b146c325 Refactor interpreter to use borrowed Ast
- Should have been like this from the start
- About 9x performance increase
2022-02-02 16:24:42 +01:00
7b6fc89fb7 Implement if 2022-02-02 16:19:46 +01:00
8c9756b6d2 Implement print keyword 2022-02-02 14:05:58 +01:00
02993142df Update README 2022-01-31 23:49:22 +01:00
3348b7cf6d Implement loop keyword
- Loop is a combination of `while` and `for`
- `loop cond { }` acts exactly like `while`
- `loop cond; advance { }` acts like `for` without init
2022-01-31 16:58:46 +01:00
3098dc7e0a Implement simple CLI
- Implement running files
- Implement interactive mode
- Enable printing tokens & ast with flags
2022-01-31 16:24:25 +01:00
e0c00019ff Implement line comments 2022-01-29 23:29:09 +01:00
35fbae8ab9 Implement multi statement code
- Add statements
- Add mandatory semicolons after statements
2022-01-29 23:18:15 +01:00
23d336d63e Implement variables
- Assignment
- Declaration
- Identifier lexing
2022-01-29 22:49:15 +01:00
39351e1131 Slightly refactor lexer 2022-01-29 21:59:48 +01:00
b7872da3ea Move grammar def. to README 2022-01-29 21:54:05 +01:00
5cc89b855a Update grammar 2022-01-29 21:52:31 +01:00
32e4f1ea4f Implement relational binops 2022-01-29 21:48:55 +01:00
b664297c73 Implement comparison binops 2022-01-29 21:37:44 +01:00
ea60f17647 Implement bitwise not 2022-01-29 21:26:14 +01:00
5ffa0ea2ec Update README 2022-01-29 21:18:08 +01:00
2a59fe8c84 Implement unary negate 2022-01-29 21:12:01 +01:00
8f79440219 Update README 2022-01-29 20:52:30 +01:00
128b05b8a8 Implement parenthesis grouping 2022-01-29 20:51:55 +01:00
a9ee8eb66c Update grammar definition 2022-01-28 14:00:51 +01:00
5c7b6a7b41 Update README 2022-01-28 12:20:59 +01:00
a569781691 Implement more operators
- Mod
- Bitwise Or
- Bitwise And
- Bitwise Xor
- Shift Left
- Shift Right
2022-01-27 23:15:16 +01:00
0b75c30784 Implement div & sub 2022-01-27 22:29:06 +01:00
ed2ae144dd Number separator _ 2022-01-27 21:38:58 +01:00
7 changed files with 219 additions and 273 deletions

28
examples/euler5.nek Normal file
View File

@@ -0,0 +1,28 @@
// 2520 is the smallest number that can be divided by each of the numbers from 1 to 10 without any remainder.
// What is the smallest positive number that is evenly divisible by all of the numbers from 1 to 20?
//
// Correct Answer: 232_792_560
num <- 20;
should_continue <- 1;
i <- 2;
loop should_continue {
should_continue = 0;
i = 20;
loop i >= 2; i = i - 1 {
if num % i != 0 {
should_continue = 1;
// break
i = 0;
}
}
if should_continue == 1 {
num = num + 20;
}
}
print num;

View File

@@ -82,8 +82,6 @@ pub enum Expression {
I64(i64),
/// String literal
String(Rc<String>),
FunCall(String, Vec<Expression>),
/// Variable
Var(String),
/// Binary operation. Consists of type, left hand side and right hand side
@@ -118,11 +116,35 @@ pub enum Statement {
Loop(Loop),
If(If),
Print(Expression),
FunDecl(String, Vec<String>, Ast),
Return(Expression),
}
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Ast {
pub prog: Vec<Statement>,
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
pub fn precedence(&self) -> u8 {
match self {
BinOpType::Declare => 0,
BinOpType::Assign => 1,
BinOpType::LOr => 2,
BinOpType::LAnd => 3,
BinOpType::BOr => 4,
BinOpType::BXor => 5,
BinOpType::BAnd => 6,
BinOpType::EquEqu | BinOpType::NotEqu => 7,
BinOpType::Less | BinOpType::LessEqu | BinOpType::Greater | BinOpType::GreaterEqu => 8,
BinOpType::Shl | BinOpType::Shr => 9,
BinOpType::Add | BinOpType::Sub => 10,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 11,
}
}
}

View File

@@ -1,6 +1,10 @@
use std::{collections::HashMap, fmt::Display, rc::Rc, cell::RefCell};
use std::{fmt::Display, rc::Rc};
use crate::{ast::{Expression, BinOpType, UnOpType, Ast, Statement, If}, parser::parse, lexer::lex};
use crate::{
ast::{Ast, BinOpType, Expression, If, Statement, UnOpType},
lexer::lex,
parser::parse,
};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value {
@@ -8,25 +12,34 @@ pub enum Value {
String(Rc<String>),
}
pub enum RunEnd {
Return(Value),
End,
}
pub struct Interpreter {
// Variable table stores the runtime values of variables
vartable: HashMap<String, Value>,
funtable: HashMap<String, RefCell<(Vec<String>, Ast)>>,
vartable: Vec<(String, Value)>,
}
impl Interpreter {
pub fn new() -> Self {
Self {
vartable: HashMap::new(),
funtable: HashMap::new(),
vartable: Vec::new(),
}
}
fn get_var(&self, name: &str) -> Option<Value> {
self.vartable
.iter()
.rev()
.find(|it| it.0 == name)
.map(|it| it.1.clone())
}
fn get_var_mut(&mut self, name: &str) -> Option<&mut Value> {
self.vartable
.iter_mut()
.rev()
.find(|it| it.0 == name)
.map(|it| &mut it.1)
}
pub fn run_str(&mut self, code: &str, print_tokens: bool, print_ast: bool) {
let tokens = lex(code).unwrap();
if print_tokens {
@@ -41,17 +54,14 @@ impl Interpreter {
self.run(&ast);
}
pub fn run(&mut self, prog: &Ast) -> RunEnd {
pub fn run(&mut self, prog: &Ast) {
let vartable_len = self.vartable.len();
for stmt in &prog.prog {
match stmt {
Statement::Expr(expr) => {
self.resolve_expr(expr);
}
Statement::Return(expr) => {
return RunEnd::Return(self.resolve_expr(expr));
}
Statement::Loop(looop) => {
// loop runs as long condition != 0
loop {
@@ -59,10 +69,7 @@ impl Interpreter {
break;
}
match self.run(&looop.body) {
RunEnd::Return(val) => return RunEnd::Return(val),
RunEnd::End => (),
}
self.run(&looop.body);
if let Some(adv) = &looop.advancement {
self.resolve_expr(&adv);
@@ -75,24 +82,21 @@ impl Interpreter {
print!("{}", result);
}
Statement::If(If {condition, body_true, body_false}) => {
let end = if matches!(self.resolve_expr(condition), Value::I64(0)) {
self.run(body_false)
Statement::If(If {
condition,
body_true,
body_false,
}) => {
if matches!(self.resolve_expr(condition), Value::I64(0)) {
self.run(body_false);
} else {
self.run(body_true)
};
match end {
RunEnd::Return(val) => return RunEnd::Return(val),
RunEnd::End => (),
self.run(body_true);
}
}
Statement::FunDecl(name, args, body) => {
self.funtable.insert(name.clone(), (args.clone(), body.clone()).into());
}
}
}
RunEnd::End
self.vartable.truncate(vartable_len);
}
fn resolve_expr(&mut self, expr: &Expression) -> Value {
@@ -102,28 +106,11 @@ impl Interpreter {
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs),
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand),
Expression::Var(name) => self.resolve_var(name),
Expression::FunCall(name, args) => {
let fun = self.funtable.get(name).expect("Function not declared").clone();
for i in 0 .. args.len() {
let val = self.resolve_expr(&args[i]);
self.vartable.insert(fun.borrow().0[i].clone(), val);
}
if fun.borrow().0.len() != args.len() {
panic!("Invalid number of arguments for function");
}
let end = self.run(&fun.borrow().1);
match end {
RunEnd::Return(val) => val,
RunEnd::End => Value::I64(0),
}
}
}
}
fn resolve_var(&mut self, name: &str) -> Value {
match self.vartable.get(name) {
match self.get_var(name) {
Some(val) => val.clone(),
None => panic!("Variable '{}' used but not declared", name),
}
@@ -145,17 +132,17 @@ impl Interpreter {
match (&bo, &lhs) {
(BinOpType::Declare, Expression::Var(name)) => {
self.vartable.insert(name.clone(), rhs.clone());
self.vartable.push((name.clone(), rhs.clone()));
return rhs;
}
(BinOpType::Assign, Expression::Var(name)) => {
match self.vartable.get_mut(name) {
match self.get_var_mut(name) {
Some(val) => *val = rhs.clone(),
None => panic!("Runtime Error: Trying to assign value to undeclared variable"),
}
return rhs;
}
_ => ()
_ => (),
}
let lhs = self.resolve_expr(lhs);
@@ -197,11 +184,10 @@ impl Display for Value {
}
}
#[cfg(test)]
mod test {
use super::{Interpreter, Value};
use crate::ast::{Expression, BinOpType};
use crate::ast::{BinOpType, Expression};
#[test]
fn test_interpreter_expr() {
@@ -212,7 +198,12 @@ mod test {
Expression::BinOp(
BinOpType::Add,
Expression::I64(1).into(),
Expression::BinOp(BinOpType::Mul, Expression::I64(2).into(), Expression::I64(3).into()).into(),
Expression::BinOp(
BinOpType::Mul,
Expression::I64(2).into(),
Expression::I64(3).into(),
)
.into(),
)
.into(),
Expression::I64(4).into(),

View File

@@ -106,12 +106,30 @@ impl<'a> Lexer<'a> {
'{' => tokens.push(Token::LBraces),
'}' => tokens.push(Token::RBraces),
'!' => tokens.push(Token::LNot),
',' => tokens.push(Token::Comma),
// Lex numbers
ch @ '0'..='9' => {
// Special tokens with variable length
// Lex multiple characters together as numbers
ch @ '0'..='9' => tokens.push(self.lex_number(ch)?),
// Lex multiple characters together as a string
'"' => tokens.push(self.lex_str()?),
// Lex multiple characters together as identifier
ch @ ('a'..='z' | 'A'..='Z' | '_') => tokens.push(self.lex_identifier(ch)?),
ch => Err(LexErr::UnexpectedChar(ch))?,
}
}
Ok(tokens)
}
/// Lex multiple characters as a number until encountering a non numeric digit. This includes
/// the first character
fn lex_number(&mut self, first_char: char) -> Result<Token, LexErr> {
// String representation of the integer value
let mut sval = String::from(ch);
let mut sval = String::from(first_char);
// Do as long as a next char exists and it is a numeric char
loop {
@@ -131,11 +149,11 @@ impl<'a> Lexer<'a> {
// Try to convert the string representation of the value to i64
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
tokens.push(Token::I64(i64val));
Ok(Token::I64(i64val))
}
// Lex a string
'"' => {
/// Lex characters as a string until encountering an unescaped closing doublequoute char '"'
fn lex_str(&mut self) -> Result<Token, LexErr> {
// Opening " was consumed in match
let mut text = String::new();
@@ -165,12 +183,12 @@ impl<'a> Lexer<'a> {
// Consume closing "
self.next();
tokens.push(Token::String(text))
Ok(Token::String(text))
}
// Lex characters as identifier
ch @ ('a'..='z' | 'A'..='Z' | '_') => {
let mut ident = String::from(ch);
/// Lex characters from the text as an identifier. This includes the first character passed in
fn lex_identifier(&mut self, first_char: char) -> Result<Token, LexErr> {
let mut ident = String::from(first_char);
// Do as long as a next char exists and it is a valid char for an identifier
loop {
@@ -190,21 +208,12 @@ impl<'a> Lexer<'a> {
"print" => Token::Print,
"if" => Token::If,
"else" => Token::Else,
"fun" => Token::Fun,
"return" => Token::Return,
// If it doesn't match a keyword, it is a normal identifier
_ => Token::Ident(ident),
};
tokens.push(token);
}
ch => Err(LexErr::UnexpectedChar(ch))?,
}
}
Ok(tokens)
Ok(token)
}
/// Advance to next character and return the removed char

View File

@@ -1,8 +1,11 @@
use std::{env::args, fs, io::{stdout, Write, stdin}};
use std::{
env::args,
fs,
io::{stdin, stdout, Write},
};
use nek_lang::interpreter::Interpreter;
#[derive(Debug, Default)]
struct CliConfig {
print_tokens: bool,
@@ -12,7 +15,6 @@ struct CliConfig {
}
fn main() {
let mut conf = CliConfig::default();
// Go through all commandline arguments except the first (filename)
@@ -49,7 +51,5 @@ fn main() {
interpreter.run_str(&code, conf.print_tokens, conf.print_ast);
}
}
}

View File

@@ -3,6 +3,12 @@ use std::iter::Peekable;
use crate::ast::*;
use crate::token::Token;
/// Parse the given tokens into an abstract syntax tree
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let mut parser = Parser::new(tokens);
parser.parse()
}
struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>,
}
@@ -14,6 +20,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Self { tokens }
}
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
/// encountering end-of-file or a block end '}' .
fn parse(&mut self) -> Ast {
let mut prog = Vec::new();
@@ -22,10 +30,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Token::Semicolon => {
self.next();
}
Token::EoF => break,
Token::RBraces => {
break;
}
Token::EoF | Token::RBraces => break,
// By default try to lex a statement
_ => prog.push(self.parse_stmt()),
@@ -35,6 +40,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Ast { prog }
}
/// Parse a single statement from the tokens.
fn parse_stmt(&mut self) -> Statement {
match self.peek() {
Token::Loop => Statement::Loop(self.parse_loop()),
@@ -52,64 +58,8 @@ impl<T: Iterator<Item = Token>> Parser<T> {
Statement::Print(expr)
}
Token::Return => {
self.next();
let expr = self.parse_expr();
// After a statement, there must be a semicolon
if !matches!(self.next(), Token::Semicolon) {
panic!("Expected semicolon after statement");
}
Statement::Return(expr)
}
Token::If => Statement::If(self.parse_if()),
Token::Fun => {
self.next();
let name = match self.next() {
Token::Ident(name) => name,
_ => panic!("Error lexing function: Expected ident token"),
};
let mut args = Vec::new();
if !matches!(self.next(), Token::LParen) {
panic!("Expected opening parenthesis");
}
while self.peek() != &Token::RParen {
let argname = match self.next() {
Token::Ident(argname) => argname,
_ => panic!("Error lexing function: Expected ident token for argname"),
};
args.push(argname);
if self.peek() == &Token::Comma {
self.next();
}
}
self.next();
if !matches!(self.next(), Token::LBraces) {
panic!("Expected opening braces");
}
let body = self.parse();
if !matches!(self.next(), Token::RBraces) {
panic!("Expected closing braces");
}
Statement::FunDecl(name, args, body)
}
// If it is not a loop, try to lex as an expression
_ => {
let stmt = Statement::Expr(self.parse_expr());
@@ -124,6 +74,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}
}
/// Parse an if statement from the tokens
fn parse_if(&mut self) -> If {
if !matches!(self.next(), Token::If) {
panic!("Error lexing if: Expected if token");
@@ -164,6 +115,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}
}
/// Parse a loop statement from the tokens
fn parse_loop(&mut self) -> Loop {
if !matches!(self.next(), Token::Loop) {
panic!("Error lexing loop: Expected loop token");
@@ -203,6 +155,7 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}
}
/// Parse a single expression from the tokens
fn parse_expr(&mut self) -> Expression {
let lhs = self.parse_primary();
self.parse_expr_precedence(lhs, 0)
@@ -245,8 +198,6 @@ impl<T: Iterator<Item = Token>> Parser<T> {
// Literal String
Token::String(text) => Expression::String(text.into()),
Token::Ident(name) if matches!(self.peek(), Token::LParen) => self.parse_funcall(name),
Token::Ident(name) => Expression::Var(name),
// Parentheses grouping
@@ -283,24 +234,6 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}
}
fn parse_funcall(&mut self, name: String) -> Expression {
let mut args = Vec::new();
// Consume (
self.next();
while self.peek() != &Token::RParen {
args.push(self.parse_expr());
if self.peek() == &Token::Comma {
self.next();
}
}
self.next();
Expression::FunCall(name, args)
}
/// Get the next Token without removing it
fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&Token::EoF)
@@ -312,37 +245,6 @@ impl<T: Iterator<Item = Token>> Parser<T> {
}
}
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let mut parser = Parser::new(tokens);
parser.parse()
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
fn precedence(&self) -> u8 {
match self {
BinOpType::Declare => 0,
BinOpType::Assign => 1,
BinOpType::LOr => 2,
BinOpType::LAnd => 3,
BinOpType::BOr => 4,
BinOpType::BXor => 5,
BinOpType::BAnd => 6,
BinOpType::EquEqu | BinOpType::NotEqu => 7,
BinOpType::Less | BinOpType::LessEqu | BinOpType::Greater | BinOpType::GreaterEqu => 8,
BinOpType::Shl | BinOpType::Shr => 9,
BinOpType::Add | BinOpType::Sub => 10,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 11,
}
}
}
#[cfg(test)]
mod tests {
use super::{parse, BinOpType, Expression};

View File

@@ -23,12 +23,6 @@ pub enum Token {
/// Else keyword (else)
Else,
Fun,
Comma,
Return,
/// Left Parenthesis ('(')
LParen,