79 Commits

Author SHA1 Message Date
1ade6cae50 Mention vsc extension 2022-02-11 13:31:48 +01:00
3e4ed82dc4 Update README 2022-02-11 13:04:45 +01:00
e5edc6b2ba Fix UB non top-level functions 2022-02-11 13:00:41 +01:00
f4286db21d Remove anyhow dependency 2022-02-11 12:36:36 +01:00
3892ea46e0 Update examples 2022-02-11 01:19:45 +01:00
8b7ed96e15 Update nice_panic macro 2022-02-11 01:19:34 +01:00
67b07dfd72 Fix typo 2022-02-11 01:01:31 +01:00
6c0867143b Add toc to README 2022-02-11 00:12:36 +01:00
abefe32300 Update README 2022-02-10 23:02:40 +01:00
742d6706b0 Array values are now pass-by-reference 2022-02-10 21:27:05 +01:00
3806a61756 Allow endless loops with no condition 2022-02-10 20:36:26 +01:00
2880ba81ab Implement break & continue
- Fix return propagation inside loops
2022-02-10 13:13:15 +01:00
4e92a416ed Improve CLI
- Remove unused flags
- Show more helpful error messages
2022-02-10 12:58:09 +01:00
c1bee69fa6 Simplify general program tests 2022-02-10 12:24:20 +01:00
f2331d7de9 Add general test for functions as example 2022-02-10 12:19:01 +01:00
c4d2f89d35 Fix function args 2022-02-10 12:13:30 +01:00
ab059ce18c Add recursive fibonacci as test 2022-02-10 01:32:07 +01:00
aeedfb4ef2 Implement functions
- Implement function declaration and call
- Change the precalculated variable stack positions to contain the
  offset from the end instead of the absolute position. This is
  important for passing fun args on the stack
- Add the ability to offset the stackframes. This is used to delete the
  stack where the fun args have been stored before the block executes
- Implement exit type for blocks in interpreter. This is used to get the
  return values and propagate them where needed
- Add recursive fibonacci examples
2022-02-10 01:26:11 +01:00
f0c2bd8dde Remove panics from interpreter, worse performance
- Replaced the interpreters panics with actual errors and results
- Added a few extra checks for arrays and div-by-zero
- These changes significantly reduced runtime performance, even without
  the extra checks
2022-02-09 18:18:21 +01:00
421fbbc873 Update euler5 example 2022-02-09 17:12:47 +01:00
383da4ae05 Rewrite declaration as statement instead of binop
- Declarations are now separate statements
- Generate unknown var errors when vars are not declared
- Replace Peekable by new custom PutBackIter type that allows for
  unlimited putback and therefore look-ahead
2022-02-09 16:54:06 +01:00
7ea5f67f9c Cleaner unop parsing 2022-02-09 14:23:24 +01:00
235eb460dc Replace panics with errors in parser 2022-02-09 13:49:14 +01:00
2312deec5b Small refactoring for parser 2022-02-09 01:13:22 +01:00
948d41fb45 Update lexer tests 2022-02-09 00:20:56 +01:00
fdef796440 Update token macros 2022-02-08 23:26:23 +01:00
926bdeb2dc Refactor lexer match loop 2022-02-08 22:54:41 +01:00
726dd62794 Big token refactoring
- Extract keywords, literals and combo tokens into separate sub-enums
- Add a macro for quickly generating all tokens including the sub-enum
  tokens. This also takes less chars to write
2022-02-08 18:56:17 +01:00
c723b1c2cb Rename var in parser 2022-02-06 15:31:41 +01:00
e7b67d85a9 Add game of life example 2022-02-05 11:53:01 +01:00
cf2e5348bb Implement arrays 2022-02-04 18:48:45 +01:00
8b67c4d59c Implement block scopes (code inside braces)
- Putting code in between braces will create a new scope
2022-02-04 17:30:23 +01:00
cbf31fa513 Implement simple AST optimizer
- Precalculate operations only containing literals
2022-02-04 17:06:38 +01:00
56665af233 Update examples 2022-02-04 14:25:25 +01:00
22634af554 Precalculate stack positions for variables
- Parser calculates positions for the variables
- This removes the lookup time during runtime
- Consistent high performance
2022-02-04 14:25:25 +01:00
d4c6f3d5dc Implement string interning 2022-02-04 14:25:23 +01:00
4dbc3adfd5 Refactor Ast to ScopedBlock 2022-02-04 14:24:03 +01:00
cbea567d65 Implement vec based scopes
- Replaced vartable hashmap with vec
- Use linear search in reverse to find the variables by name
- This is really fast with a small number of variables but tanks fast
  with more vars due to O(n) lookup times
- Implemented scopes by dropping all elements from the vartable at the
  end of a scope
2022-02-04 14:24:00 +01:00
e4977da546 Use euler examples as tests 2022-02-04 12:45:34 +01:00
588b3b5b2c Autoformat 2022-02-03 17:38:25 +01:00
f6152670aa Small refactor for lexer 2022-02-03 17:25:55 +01:00
c2b9ee71b8 Add project euler example 5 2022-02-03 16:16:38 +01:00
f8e5bd7423 Add comments to parser 2022-02-03 16:01:33 +01:00
d7001a5c52 Refactor, Comments, Bugfix for lexer
- Small refactoring in the lexer
- Added some more comments to the lexer
- Fixed endless loop when encountering comment in last line
2022-02-03 00:44:48 +01:00
bc68d9fa49 Add Result + Err to lexer 2022-02-02 21:59:46 +01:00
264d8f92f4 Update README 2022-02-02 19:40:10 +01:00
d8f5b876ac Implement String Literals
- String literals can be stored in variables, but are fully immutable
  and are not compatible with any operators
2022-02-02 19:38:28 +01:00
8cf6177cbc Update README 2022-02-02 19:15:20 +01:00
39bd4400b4 Implement logical not 2022-02-02 19:14:11 +01:00
75b99869d4 Rework README
- Add full language description
- Fix variable name inconsistency
2022-02-02 19:00:14 +01:00
de0bbb8171 Implement logical and / or 2022-02-02 18:56:45 +01:00
92f59cbf9a Update README 2022-02-02 16:48:26 +01:00
dd9ca660cc Move ast into separate file 2022-02-02 16:43:14 +01:00
7e2ef49481 Move token into separate file 2022-02-02 16:40:05 +01:00
86130984e2 Add example programs (project euler) 2022-02-02 16:26:37 +01:00
c4b146c325 Refactor interpreter to use borrowed Ast
- Should have been like this from the start
- About 9x performance increase
2022-02-02 16:24:42 +01:00
7b6fc89fb7 Implement if 2022-02-02 16:19:46 +01:00
8c9756b6d2 Implement print keyword 2022-02-02 14:05:58 +01:00
02993142df Update README 2022-01-31 23:49:22 +01:00
3348b7cf6d Implement loop keyword
- Loop is a combination of `while` and `for`
- `loop cond { }` acts exactly like `while`
- `loop cond; advance { }` acts like `for` without init
2022-01-31 16:58:46 +01:00
3098dc7e0a Implement simple CLI
- Implement running files
- Implement interactive mode
- Enable printing tokens & ast with flags
2022-01-31 16:24:25 +01:00
e0c00019ff Implement line comments 2022-01-29 23:29:09 +01:00
35fbae8ab9 Implement multi statement code
- Add statements
- Add mandatory semicolons after statements
2022-01-29 23:18:15 +01:00
23d336d63e Implement variables
- Assignment
- Declaration
- Identifier lexing
2022-01-29 22:49:15 +01:00
39351e1131 Slightly refactor lexer 2022-01-29 21:59:48 +01:00
b7872da3ea Move grammar def. to README 2022-01-29 21:54:05 +01:00
5cc89b855a Update grammar 2022-01-29 21:52:31 +01:00
32e4f1ea4f Implement relational binops 2022-01-29 21:48:55 +01:00
b664297c73 Implement comparison binops 2022-01-29 21:37:44 +01:00
ea60f17647 Implement bitwise not 2022-01-29 21:26:14 +01:00
5ffa0ea2ec Update README 2022-01-29 21:18:08 +01:00
2a59fe8c84 Implement unary negate 2022-01-29 21:12:01 +01:00
8f79440219 Update README 2022-01-29 20:52:30 +01:00
128b05b8a8 Implement parenthesis grouping 2022-01-29 20:51:55 +01:00
a9ee8eb66c Update grammar definition 2022-01-28 14:00:51 +01:00
5c7b6a7b41 Update README 2022-01-28 12:20:59 +01:00
a569781691 Implement more operators
- Mod
- Bitwise Or
- Bitwise And
- Bitwise Xor
- Shift Left
- Shift Right
2022-01-27 23:15:16 +01:00
0b75c30784 Implement div & sub 2022-01-27 22:29:06 +01:00
ed2ae144dd Number separator _ 2022-01-27 21:38:58 +01:00
23 changed files with 2248 additions and 734 deletions

View File

@@ -4,5 +4,4 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0.53"
thiserror = "1.0.30" thiserror = "1.0.30"

253
README.md
View File

@@ -1,9 +1,45 @@
# NEK-Lang # NEK-Lang
## Table of contents
- [NEK-Lang](#nek-lang)
- [Table of contents](#table-of-contents)
- [Variables](#variables)
- [Declaration](#declaration)
- [Assignment](#assignment)
- [Datatypes](#datatypes)
- [I64](#i64)
- [String](#string)
- [Array](#array)
- [Expressions](#expressions)
- [General](#general)
- [Mathematical Operators](#mathematical-operators)
- [Bitwise Operators](#bitwise-operators)
- [Logical Operators](#logical-operators)
- [Equality & Relational Operators](#equality--relational-operators)
- [Control-Flow](#control-flow)
- [Loop](#loop)
- [If / Else](#if--else)
- [Block Scopes](#block-scopes)
- [Functions](#functions)
- [Function definition](#function-definition)
- [Function calls](#function-calls)
- [IO](#io)
- [Print](#print)
- [Comments](#comments)
- [Line comments](#line-comments)
- [Feature Tracker](#feature-tracker)
- [High level Components](#high-level-components)
- [Language features](#language-features)
- [Parsing Grammar](#parsing-grammar)
- [Expressions](#expressions-1)
- [Statements](#statements)
- [Examples](#examples)
- [Extras](#extras)
- [Visual Studio Code Language Support](#visual-studio-code-language-support)
## Variables ## Variables
Currently all variables are global and completely unscoped. That means no matter where a variable is declared, it remains over the whole remaining runtime of the progam. The variables are all contained in scopes. Variables defined in an outer scope can be accessed in
inner scoped. All variables defined in a scope that has ended do no longer exist and can't be
All variables are currently of type `i64` (64-bit signed integer) accessed.
### Declaration ### Declaration
- Declare and initialize a new variable - Declare and initialize a new variable
@@ -25,6 +61,61 @@ a = 123;
``` ```
The value `123` is assigned to the variable named `a`. `a` needs to be declared before this. The value `123` is assigned to the variable named `a`. `a` needs to be declared before this.
## Datatypes
The available variable datatypes are `i64` (64-bit signed integer), `string` (`"this is a string"`) and `array` (`[10]`)
### I64
- The normal default datatype is `i64` which is a 64-bit signed integer
- Can be created by just writing an integer literal like `546`
- Inside the number literal `_` can be inserted for visual separation `100_000`
- The i64 values can be used as expected in calculations, conditions and so on
-
```
my_i64 <- 123_456;
```
### String
- Strings mainly exist for formatting the text output of a program
- Strings can be created by using doublequotes like in other languages `"Hello world"`
- There is no way to access or change the characters of the string
- Unicode characters are supported `"Hello 🌎"`
- Escape characters `\n`, `\r`, `\t`, `\"`, `\\` are supported
- String can still be assigned to variables, just like i64
```
world <- "🌎";
print "Hello ";
print world;
print "\n";
```
### Array
- Arrays can contain any other datatypes and don't need to have the same type in all cells
- Arrays can be created by using brackets with the size in between `[size]`
- Arrays must be assigned to a variable to be used
- All cells will be initialized with i64 0 values
- The size can be any expression that results in a positive i64 value
- The array size can't be changed after creation
- The arrays data is always allocated on the heap
- The array cells can be accessed by using the variable name and brackets `my_arr[index]`
- The index can be any expression that results in a positive i64 value in the range of the arrays
indices
- The indices start with 0
- When an array is passed to a function, it is passed by reference
```
width <- 5;
heigt <- 5;
// Initialize array of size 25 with 25x 0
my_array = [width * height];
// Modify first value
my_array[0] = 5;
// Print first value
print my_array[0];
```
## Expressions ## Expressions
The operator precedence is the same order as in `C` for all implemented operators. The operator precedence is the same order as in `C` for all implemented operators.
Refer to the Refer to the
@@ -72,24 +163,38 @@ The equality and relational operations result in `1` if the condition is evaluat
For conditions like in if or loops, every non zero value is equal to `true`, and `0` is `false`. For conditions like in if or loops, every non zero value is equal to `true`, and `0` is `false`.
### Loop ### Loop
- There is currently only the `loop` keyword that can act like a `while` with optional advancement (an expression that is executed after the loop body) - The `loop` keyword can be used as an infinite loop, as a while loop or as a while loop with advancement (an expression that is executed after the loop body)
- If only `loop` is used, directly followed by the body, it is an infinite loop that needs to be
terminated by using the `break` keyword
- The `loop` keyword is followed by the condition (an expression) without needing parentheses - The `loop` keyword is followed by the condition (an expression) without needing parentheses
- *Optional:* If there is a `;` after the condition, there must be another expression which is used as the advancement - *Optional:* If there is a `;` after the condition, there must be another expression which is used as the advancement
- The loops body is wrapped in braces (`{ }`) just like in C/C++ - The loops body is wrapped in braces (`{ }`) just like in C/C++
- The `continue` keyword can be used to end the current loop iteration early
- The `break` keyword can be used to fully break out of the current loop
``` ```
// Print the numbers from 0 to 9 // Print the numbers from 0 to 9
// With endless loop
i <- 0;
loop {
if i >= 10 {
break;
}
print i;
i = i + 1;
}
// Without advancement // Without advancement
i <- 0; i <- 0;
loop i < 10 { loop i < 10 {
print i; print i;
i = i - 1; i = i + 1;
} }
// With advancement // With advancement
k <- 0; k <- 0;
loop k < 10; k = k - 1 { loop k < 10; k = k + 1 {
print k; print k;
} }
``` ```
@@ -112,6 +217,74 @@ if a == b {
} }
``` ```
### Block Scopes
- It is possible to create a limited scope for local variables that will no longer exist once the
scope ends
- Shadowing variables by redefining a variable in an inner scope is supported
```
var_in_outer_scope <- 5;
{
var_in_inner_scope <- 3;
// Inner scope can access both vars
print var_in_outer_scope;
print var_in_inner_scope;
}
// Outer scope is still valid
print var_in_outer_scope;
// !!! THIS DOES NOT WORK !!!
// The inner scope has ended
print var_in_inner_scope;
```
## Functions
### Function definition
- Functions can be defined by using the `fun` keyword, followed by the function name and the
parameters in parentheses. After the parentheses, the body is specified inside a braces block
- The function parameters are specified by only the names
- The function body has its own scope
- Parameters are only accessible inside the body
- Variables from the outer scope can be accessed and modified if the are defined before the function
- Variables from the outer scope are shadowed by parameters with the same name
- The `return` keyword can be used to return a value from the function and exit it immediately
- If no return is specified, a `void` value is returned
- Functions can only be defined at the top-level. So defining a function inside of any other scoped
block (like inside another function, if, loop, ...) is invalid
- Functions can only be used after definition and there is no forward declaration right now
- However a function can be called recursively inside of itself
- Functions can't be redefined, so defining a function with an existing name is invalid
```
fun add_maybe(a, b) {
if a < 100 {
return a;
} else {
return a + b;
}
}
fun println(val) {
print val;
print "\n";
}
```
### Function calls
- Function calls are primary expressions, so they can be directly used in calculations (if they
return appropriate values)
- Function calls are performed by writing the function name, followed by the arguments in parentheses
- The arguments can be any expressions, separated by commas
```
b <- 100;
result <- add_maybe(250, b);
// Prints 350 + new-line
println(result);
```
## IO ## IO
### Print ### Print
@@ -120,7 +293,8 @@ Printing is implemented via the `print` keyword
- Print currently automatically adds a linebreak - Print currently automatically adds a linebreak
``` ```
a <- 1; a <- 1;
print a; // Outputs `"1\n"` to the terminal // Outputs `"1"` to the terminal
print a;
``` ```
## Comments ## Comments
@@ -140,6 +314,8 @@ Line comments can be initiated by using `//`
- [x] Lexer: Transforms text into Tokens - [x] Lexer: Transforms text into Tokens
- [x] Parser: Transforms Tokens into Abstract Syntax Tree - [x] Parser: Transforms Tokens into Abstract Syntax Tree
- [x] Interpreter (tree-walk-interpreter): Walks the tree and evaluates the expressions / statements - [x] Interpreter (tree-walk-interpreter): Walks the tree and evaluates the expressions / statements
- [x] Simple optimizer: Apply trivial optimizations to the Ast
- [x] Precalculate binary ops / unary ops that have only literal operands
## Language features ## Language features
@@ -149,7 +325,7 @@ Line comments can be initiated by using `//`
- [x] Subtraction `a - b` - [x] Subtraction `a - b`
- [x] Multiplication `a * b` - [x] Multiplication `a * b`
- [x] Division `a / b` - [x] Division `a / b`
- [x] Modulo `a % b - [x] Modulo `a % b`
- [x] Negate `-a` - [x] Negate `-a`
- [x] Parentheses `(a + b) * c` - [x] Parentheses `(a + b) * c`
- [x] Logical boolean operators - [x] Logical boolean operators
@@ -173,23 +349,43 @@ Line comments can be initiated by using `//`
- [x] Variables - [x] Variables
- [x] Declaration - [x] Declaration
- [x] Assignment - [x] Assignment
- [x] Local variables (for example inside loop, if, else, functions)
- [x] Scoped block for specific local vars `{ ... }`
- [x] Statements with semicolon & Multiline programs - [x] Statements with semicolon & Multiline programs
- [x] Control flow - [x] Control flow
- [x] While loop `while X { ... }` - [x] Loops
- [x] While-style loop `loop X { ... }`
- [x] For-style loop without with `X` as condition and `Y` as advancement `loop X; Y { ... }`
- [x] Infinite loop `loop { ... }`
- [x] Break `break`
- [x] Continue `continue`
- [x] If else statement `if X { ... } else { ... }` - [x] If else statement `if X { ... } else { ... }`
- [x] If Statement - [x] If Statement
- [x] Else statement - [x] Else statement
- [x] Line comments `//` - [x] Line comments `//`
- [x] Strings - [x] Strings
- [x] Arrays
- [x] Creating array with size `X` as a variable `arr <- [X]`
- [x] Accessing arrays by index `arr[X]`
- [x] IO Intrinsics - [x] IO Intrinsics
- [x] Print - [x] Print
- [x] Functions
- [x] Function declaration `fun f(X, Y, Z) { ... }`
- [x] Function calls `f(1, 2, 3)`
- [x] Function returns `return X`
- [x] Local variables
- [x] Pass arrays by-reference, i64 by-vale, string is a const ref
## Grammar # Parsing Grammar
### Expressions ## Expressions
``` ```
LITERAL = I64_LITERAL | STR_LITERAL ARRAY_LITERAL = "[" expr "]"
expr_primary = LITERAL | IDENT | "(" expr ")" | "-" expr_primary | "~" expr_primary ARRAY_ACCESS = IDENT "[" expr "]"
FUN_CALL = IDENT "(" (expr ",")* expr? ")"
LITERAL = I64_LITERAL | STR_LITERAL | ARRAY_LITERAL
expr_primary = LITERAL | IDENT | FUN_CALL | ARRAY_ACCESS | "(" expr ")" | "-" expr_primary
| "~" expr_primary
expr_mul = expr_primary (("*" | "/" | "%") expr_primary)* expr_mul = expr_primary (("*" | "/" | "%") expr_primary)*
expr_add = expr_mul (("+" | "-") expr_mul)* expr_add = expr_mul (("+" | "-") expr_mul)*
expr_shift = expr_add ((">>" | "<<") expr_add)* expr_shift = expr_add ((">>" | "<<") expr_add)*
@@ -203,10 +399,33 @@ expr_lor = expr_land ("||" expr_land)*
expr = expr_lor expr = expr_lor
``` ```
### Statements ## Statements
``` ```
stmt_if = "if" expr "{" stmt* "}" ("else" "{" stmt* "}")? stmt_return = "return" expr ";"
stmt_loop = "loop" expr (";" expr)? "{" stmt* "}" stmt_break = "break" ";"
stmt_continue = "continue" ";"
stmt_var_decl = IDENT "<-" expr ";"
stmt_fun_decl = "fun" IDENT "(" (IDENT ",")* IDENT? ")" "{" stmt* "}"
stmt_expr = expr ";" stmt_expr = expr ";"
stmt = stmt_expr | stmt_loop stmt_block = "{" stmt* "}"
stmt_loop = "loop" (expr (";" expr)?)? "{" stmt* "}"
stmt_if = "if" expr "{" stmt* "}" ("else" "{" stmt* "}")?
stmt_print = "print" expr ";"
stmt = stmt_return | stmt_break | stmt_continue | stmt_var_decl | stmt_fun_decl
| stmt_expr | stmt_block | stmt_loop | stmt_if | stmt_print
``` ```
# Examples
There are a bunch of examples in the [examples](examples/) directory. Those include (non-optimal) solutions to the first five project euler problems, as well as a [simple Game of Life implementation](examples/game_of_life.nek).
To run an example via `cargo-run`, use:
```
cargo run --release -- examples/[NAME]
```
# Extras
## Visual Studio Code Language Support
A VSCode extension that provides simple syntax highlighing for nek is also available on
[gitlab](https://code.fbi.h-da.de/advanced-systems-programming-ws21/x4/nek-lang-vscode). Since this
is a very small scale project, the extension was not published and instuctions on how to install it
can be found in the mentioned repository.

View File

@@ -7,7 +7,7 @@
sum <- 0; sum <- 0;
i <- 0; i <- 0;
loop i < 1_000; i = i + 1 { loop i < 1_000; i = i + 1 {
if i % 3 == 0 | i % 5 == 0 { if i % 3 == 0 || i % 5 == 0 {
sum = sum + i; sum = sum + i;
} }
} }

View File

@@ -10,14 +10,12 @@ sum <- 0;
a <- 0; a <- 0;
b <- 1; b <- 1;
tmp <- 0;
loop a < 4_000_000 { loop a < 4_000_000 {
if a % 2 == 0 { if a % 2 == 0 {
sum = sum + a; sum = sum + a;
} }
tmp = a; tmp <- a;
a = b; a = b;
b = b + tmp; b = b + tmp;
} }

View File

@@ -18,10 +18,10 @@ loop number > 1 {
div = div + 1; div = div + 1;
if div * div > number { if div * div > number {
if number > 1 & number > result { if number > 1 && number > result {
result = number; result = number;
} }
number = 0; break;
} }
} }

View File

@@ -4,30 +4,25 @@
// //
// Correct Answer: 906609 // Correct Answer: 906609
fun reverse(n) {
rev <- 0;
loop n {
rev = rev * 10 + n % 10;
n = n / 10;
}
return rev;
}
res <- 0; res <- 0;
tmp <- 0;
num <- 0;
num_rev <- 0;
i <- 100; i <- 100;
k <- 100;
loop i < 1_000; i = i + 1 { loop i < 1_000; i = i + 1 {
k = 100; k <- i;
loop k < 1_000; k = k + 1 { loop k < 1_000; k = k + 1 {
num_rev = 0; num <- i * k;
num_rev <- reverse(num);
num = i * k; if num == num_rev && num > res {
tmp = num;
loop tmp {
num_rev = num_rev*10 + tmp % 10;
tmp = tmp / 10;
}
if num == num_rev & num > res {
res = num; res = num;
} }
} }

View File

@@ -4,19 +4,19 @@
# #
# Correct Answer: 906609 # Correct Answer: 906609
def reverse(n):
rev = 0
while n:
rev = rev * 10 + n % 10
n //= 10
return rev
res = 0 res = 0
for i in range(100, 999): for i in range(100, 1_000):
for k in range(100, 999): for k in range(i, 1_000):
num = i * k num = i * k
tmp = num num_rev = reverse(num)
num_rev = 0
while tmp != 0:
num_rev = num_rev*10 + tmp % 10
tmp = tmp // 10
if num == num_rev and num > res: if num == num_rev and num > res:
res = num res = num

23
examples/euler5.nek Normal file
View File

@@ -0,0 +1,23 @@
// 2520 is the smallest number that can be divided by each of the numbers from 1 to 10 without any remainder.
// What is the smallest positive number that is evenly divisible by all of the numbers from 1 to 20?
//
// Correct Answer: 232_792_560
fun gcd(x, y) {
loop y {
tmp <- x;
x = y;
y = tmp % y;
}
return x;
}
result <- 1;
i <- 1;
loop i <= 20; i = i + 1 {
result = result * (i / gcd(i, result));
}
print result;

15
examples/euler5.py Normal file
View File

@@ -0,0 +1,15 @@
# 2520 is the smallest number that can be divided by each of the numbers from 1 to 10 without any remainder.
# What is the smallest positive number that is evenly divisible by all of the numbers from 1 to 20?
#
# Correct Answer: 232_792_560
def gcd(x, y):
while y:
x, y = y, x % y
return x
result = 1
for i in range(1, 21):
result *= i // gcd(i, result)
print(result)

134
examples/game_of_life.nek Normal file
View File

@@ -0,0 +1,134 @@
fun print_field(field, width, height) {
y <- 0;
loop y < height; y = y+1 {
x <- 0;
loop x < width; x = x+1 {
if field[y*height + x] {
print "# ";
} else {
print ". ";
}
}
print "\n";
}
print "\n";
}
fun count_neighbours(field, x, y, width, height) {
neighbours <- 0;
if y > 0 {
if x > 0 {
if field[(y-1)*width + (x-1)] {
// Top left
neighbours = neighbours + 1;
}
}
if field[(y-1)*width + x] {
// Top
neighbours = neighbours + 1;
}
if x < width-1 {
if field[(y-1)*width + (x+1)] {
// Top right
neighbours = neighbours + 1;
}
}
}
if x > 0 {
if field[y*width + (x-1)] {
// Left
neighbours = neighbours + 1;
}
}
if x < width-1 {
if field[y*width + (x+1)] {
// Right
neighbours = neighbours + 1;
}
}
if y < height-1 {
if x > 0 {
if field[(y+1)*width + (x-1)] {
// Bottom left
neighbours = neighbours + 1;
}
}
if field[(y+1)*width + x] {
// Bottom
neighbours = neighbours + 1;
}
if x < width-1 {
if field[(y+1)*width + (x+1)] {
// Bottom right
neighbours = neighbours + 1;
}
}
}
return neighbours;
}
fun copy(from, to, len) {
i <- 0;
loop i < len; i = i + 1 {
to[i] = from[i];
}
}
// Set the width and height of the field
width <- 10;
height <- 10;
// Create the main and temporary field
field <- [width*height];
field2 <- [width*height];
// Preset the main field with a glider
field[1] = 1;
field[12] = 1;
field[20] = 1;
field[21] = 1;
field[22] = 1;
fun run_gol(num_rounds) {
runs <- 0;
loop runs < num_rounds; runs = runs + 1 {
// Print the field
print_field(field, width, height);
// Calculate next stage from field and store into field2
y <- 0;
loop y < height; y = y+1 {
x <- 0;
loop x < width; x = x+1 {
// Get the neighbours of the current cell
neighbours <- count_neighbours(field, x, y, width, height);
// Set the new cell according to the neighbour count
if neighbours < 2 || neighbours > 3 {
field2[y*width + x] = 0;
} else {
if neighbours == 3 {
field2[y*width + x] = 1;
} else {
field2[y*width + x] = field[y*width + x];
}
}
}
}
// Transfer from field2 to field
copy(field2, field, width*height);
}
}
run_gol(32);

View File

@@ -0,0 +1,9 @@
fun fib(n) {
if n <= 1 {
return n;
} else {
return fib(n-1) + fib(n-2);
}
}
print fib(30);

View File

@@ -0,0 +1,6 @@
def fib(n):
if n <= 1:
return n
return fib(n-1) + fib(n-2)
print(fib(30))

View File

@@ -0,0 +1,31 @@
fun square(a) {
return a * a;
}
fun add(a, b) {
return a + b;
}
fun mul(a, b) {
return a * b;
}
// Funtion with multiple args & nested calls to different functions
fun addmul(a, b, c) {
return mul(add(a, b), c);
}
a <- 10;
b <- 20;
c <- 3;
result <- addmul(a, b, c) + square(c);
// Access and modify outer variable. Argument `a` must not be used from outer var
fun sub_from_result(a) {
result = result - a;
}
sub_from_result(30);
print result;

View File

@@ -1,5 +1,7 @@
use std::rc::Rc; use std::rc::Rc;
use crate::stringstore::{StringStore, Sid};
/// Types for binary operators /// Types for binary operators
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum BinOpType { pub enum BinOpType {
@@ -59,9 +61,6 @@ pub enum BinOpType {
/// Assign value to variable /// Assign value to variable
Assign, Assign,
/// Declare new variable with value
Declare,
} }
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
@@ -81,11 +80,18 @@ pub enum Expression {
/// Integer literal (64-bit) /// Integer literal (64-bit)
I64(i64), I64(i64),
/// String literal /// String literal
String(Rc<String>), String(Sid),
/// Array with size
ArrayLiteral(Box<Expression>),
/// Array access with name, stackpos and position
ArrayAccess(Sid, usize, Box<Expression>),
FunCall(Sid, usize, Vec<Expression>),
FunCall(String, Vec<Expression>),
/// Variable /// Variable
Var(String), Var(Sid, usize),
/// Binary operation. Consists of type, left hand side and right hand side /// Binary operation. Consists of type, left hand side and right hand side
BinOp(BinOpType, Box<Expression>, Box<Expression>), BinOp(BinOpType, Box<Expression>, Box<Expression>),
/// Unary operation. Consists of type and operand /// Unary operation. Consists of type and operand
@@ -95,11 +101,11 @@ pub enum Expression {
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct Loop { pub struct Loop {
/// The condition that determines if the loop should continue /// The condition that determines if the loop should continue
pub condition: Expression, pub condition: Option<Expression>,
/// This is executed after each loop to advance the condition variables /// This is executed after each loop to advance the condition variables
pub advancement: Option<Expression>, pub advancement: Option<Expression>,
/// The loop body that is executed each loop /// The loop body that is executed each loop
pub body: Ast, pub body: BlockScope,
} }
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
@@ -107,22 +113,69 @@ pub struct If {
/// The condition /// The condition
pub condition: Expression, pub condition: Expression,
/// The body that is executed when condition is true /// The body that is executed when condition is true
pub body_true: Ast, pub body_true: BlockScope,
/// The if body that is executed when the condition is false /// The if body that is executed when the condition is false
pub body_false: Ast, pub body_false: BlockScope,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct FunDecl {
pub name: Sid,
pub fun_stackpos: usize,
pub argnames: Vec<Sid>,
pub body: Rc<BlockScope>,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct VarDecl {
pub name: Sid,
pub var_stackpos: usize,
pub rhs: Expression,
} }
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Statement { pub enum Statement {
Return(Expression),
Break,
Continue,
Declaration(VarDecl),
FunDeclare(FunDecl),
Expr(Expression), Expr(Expression),
Block(BlockScope),
Loop(Loop), Loop(Loop),
If(If), If(If),
Print(Expression), Print(Expression),
FunDecl(String, Vec<String>, Ast),
Return(Expression),
} }
#[derive(Debug, PartialEq, Eq, Clone, Default)] pub type BlockScope = Vec<Statement>;
#[derive(Clone, Default)]
pub struct Ast { pub struct Ast {
pub prog: Vec<Statement>, pub stringstore: StringStore,
pub main: BlockScope,
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
pub fn precedence(&self) -> u8 {
match self {
BinOpType::Assign => 1,
BinOpType::LOr => 2,
BinOpType::LAnd => 3,
BinOpType::BOr => 4,
BinOpType::BXor => 5,
BinOpType::BAnd => 6,
BinOpType::EquEqu | BinOpType::NotEqu => 7,
BinOpType::Less | BinOpType::LessEqu | BinOpType::Greater | BinOpType::GreaterEqu => 8,
BinOpType::Shl | BinOpType::Shr => 9,
BinOpType::Add | BinOpType::Sub => 10,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 11,
}
}
} }

111
src/astoptimizer.rs Normal file
View File

@@ -0,0 +1,111 @@
use crate::ast::{Ast, BlockScope, Expression, If, Loop, Statement, BinOpType, UnOpType, VarDecl};
pub trait AstOptimizer {
fn optimize(ast: Ast) -> Ast;
}
pub struct SimpleAstOptimizer;
impl AstOptimizer for SimpleAstOptimizer {
fn optimize(mut ast: Ast) -> Ast {
Self::optimize_block(&mut ast.main);
ast
}
}
impl SimpleAstOptimizer {
fn optimize_block(block: &mut BlockScope) {
for stmt in block {
match stmt {
Statement::Expr(expr) => Self::optimize_expr(expr),
Statement::Block(block) => Self::optimize_block(block),
Statement::Loop(Loop {
condition,
advancement,
body,
}) => {
if let Some(condition) = condition {
Self::optimize_expr(condition);
}
if let Some(advancement) = advancement {
Self::optimize_expr(advancement)
}
Self::optimize_block(body);
}
Statement::If(If {
condition,
body_true,
body_false,
}) => {
Self::optimize_expr(condition);
Self::optimize_block(body_true);
Self::optimize_block(body_false);
}
Statement::Print(expr) => Self::optimize_expr(expr),
Statement::Declaration(VarDecl { name: _, var_stackpos: _, rhs}) => Self::optimize_expr(rhs),
Statement::FunDeclare(_) => (),
Statement::Return(expr) => Self::optimize_expr(expr),
Statement::Break | Statement::Continue => (),
}
}
}
fn optimize_expr(expr: &mut Expression) {
match expr {
Expression::BinOp(bo, lhs, rhs) => {
Self::optimize_expr(lhs);
Self::optimize_expr(rhs);
// Precalculate binary operations that consist of 2 literals. No need to do this at
// runtime, as all parts of the calculation are known at *compiletime* / parsetime.
match (lhs.as_mut(), rhs.as_mut()) {
(Expression::I64(lhs), Expression::I64(rhs)) => {
let new_expr = match bo {
BinOpType::Add => Expression::I64(*lhs + *rhs),
BinOpType::Mul => Expression::I64(*lhs * *rhs),
BinOpType::Sub => Expression::I64(*lhs - *rhs),
BinOpType::Div => Expression::I64(*lhs / *rhs),
BinOpType::Mod => Expression::I64(*lhs % *rhs),
BinOpType::BOr => Expression::I64(*lhs | *rhs),
BinOpType::BAnd => Expression::I64(*lhs & *rhs),
BinOpType::BXor => Expression::I64(*lhs ^ *rhs),
BinOpType::LAnd => Expression::I64(if (*lhs != 0) && (*rhs != 0) { 1 } else { 0 }),
BinOpType::LOr => Expression::I64(if (*lhs != 0) || (*rhs != 0) { 1 } else { 0 }),
BinOpType::Shr => Expression::I64(*lhs >> *rhs),
BinOpType::Shl => Expression::I64(*lhs << *rhs),
BinOpType::EquEqu => Expression::I64(if lhs == rhs { 1 } else { 0 }),
BinOpType::NotEqu => Expression::I64(if lhs != rhs { 1 } else { 0 }),
BinOpType::Less => Expression::I64(if lhs < rhs { 1 } else { 0 }),
BinOpType::LessEqu => Expression::I64(if lhs <= rhs { 1 } else { 0 }),
BinOpType::Greater => Expression::I64(if lhs > rhs { 1 } else { 0 }),
BinOpType::GreaterEqu => Expression::I64(if lhs >= rhs { 1 } else { 0 }),
BinOpType::Assign => unreachable!(),
};
*expr = new_expr;
},
_ => ()
}
}
Expression::UnOp(uo, operand) => {
Self::optimize_expr(operand);
// Precalculate unary operations just like binary ones
match operand.as_mut() {
Expression::I64(val) => {
let new_expr = match uo {
UnOpType::Negate => Expression::I64(-*val),
UnOpType::BNot => Expression::I64(!*val),
UnOpType::LNot => Expression::I64(if *val == 0 { 1 } else { 0 }),
};
*expr = new_expr;
}
_ => (),
}
}
_ => (),
}
}
}

View File

@@ -1,172 +1,439 @@
use std::{collections::HashMap, fmt::Display, rc::Rc, cell::RefCell}; use std::{cell::RefCell, rc::Rc};
use thiserror::Error;
use crate::{ast::{Expression, BinOpType, UnOpType, Ast, Statement, If}, parser::parse, lexer::lex}; use crate::{
ast::{Ast, BinOpType, BlockScope, Expression, FunDecl, If, Statement, UnOpType},
astoptimizer::{AstOptimizer, SimpleAstOptimizer},
lexer::lex,
nice_panic,
parser::parse,
stringstore::{Sid, StringStore},
};
#[derive(Debug, Error)]
pub enum RuntimeError {
#[error("Invalid array Index: {}", 0.to_string())]
InvalidArrayIndex(Value),
#[error("Variable used but not declared: {0}")]
VarUsedNotDeclared(String),
#[error("Can't index into non-array variable: {0}")]
TryingToIndexNonArray(String),
#[error("Invalid value type for unary operation: {}", 0.to_string())]
UnOpInvalidType(Value),
#[error("Incompatible binary operations. Operands don't match: {} {}", 0.to_string(), 1.to_string())]
BinOpIncompatibleTypes(Value, Value),
#[error("Array access out of bounds: Accessed {0}, size is {1}")]
ArrayOutOfBounds(usize, usize),
#[error("Division by zero")]
DivideByZero,
#[error("Invalid number of arguments for function {0}. Expected {1}, got {2}")]
InvalidNumberOfArgs(String, usize, usize),
}
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum Value { pub enum Value {
I64(i64), I64(i64),
String(Rc<String>), String(Sid),
Array(Rc<RefCell<Vec<Value>>>),
Void,
} }
pub enum RunEnd { #[derive(Debug, PartialEq, Eq, Clone)]
pub enum BlockExit {
Normal,
Break,
Continue,
Return(Value), Return(Value),
End,
} }
#[derive(Default)]
pub struct Interpreter { pub struct Interpreter {
pub optimize_ast: bool,
pub print_tokens: bool,
pub print_ast: bool,
pub capture_output: bool,
output: Vec<Value>,
// Variable table stores the runtime values of variables // Variable table stores the runtime values of variables
vartable: HashMap<String, Value>, vartable: Vec<Value>,
funtable: HashMap<String, RefCell<(Vec<String>, Ast)>>,
funtable: Vec<FunDecl>,
stringstore: StringStore,
} }
impl Interpreter { impl Interpreter {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
vartable: HashMap::new(), optimize_ast: true,
funtable: HashMap::new(), ..Self::default()
} }
} }
pub fn run_str(&mut self, code: &str, print_tokens: bool, print_ast: bool) { pub fn output(&self) -> &[Value] {
let tokens = lex(code).unwrap(); &self.output
if print_tokens { }
fn get_var(&self, idx: usize) -> Option<Value> {
self.vartable.get(self.vartable.len() - idx - 1).cloned()
}
fn get_var_mut(&mut self, idx: usize) -> Option<&mut Value> {
let idx = self.vartable.len() - idx - 1;
self.vartable.get_mut(idx)
}
pub fn run_str(&mut self, code: &str) {
let tokens = match lex(code) {
Ok(tokens) => tokens,
Err(e) => nice_panic!("Lexing error: {}", e),
};
if self.print_tokens {
println!("Tokens: {:?}", tokens); println!("Tokens: {:?}", tokens);
} }
let ast = parse(tokens); let ast = match parse(tokens) {
if print_ast { Ok(ast) => ast,
println!("{:#?}", ast); Err(e) => nice_panic!("Parsing error: {}", e),
};
match self.run_ast(ast) {
Ok(_) => (),
Err(e) => nice_panic!("Runtime error: {}", e),
}
} }
self.run(&ast); pub fn run_ast(&mut self, mut ast: Ast) -> Result<(), RuntimeError> {
if self.optimize_ast {
ast = SimpleAstOptimizer::optimize(ast);
} }
pub fn run(&mut self, prog: &Ast) -> RunEnd { if self.print_ast {
for stmt in &prog.prog { println!("{:#?}", ast.main);
}
self.stringstore = ast.stringstore;
self.run_block(&ast.main)?;
Ok(())
}
pub fn run_block(&mut self, prog: &BlockScope) -> Result<BlockExit, RuntimeError> {
self.run_block_fp_offset(prog, 0)
}
pub fn run_block_fp_offset(
&mut self,
prog: &BlockScope,
framepointer_offset: usize,
) -> Result<BlockExit, RuntimeError> {
let framepointer = self.vartable.len() - framepointer_offset;
for stmt in prog {
match stmt { match stmt {
Statement::Expr(expr) => { Statement::Break => return Ok(BlockExit::Break),
self.resolve_expr(expr); Statement::Continue => return Ok(BlockExit::Continue),
}
Statement::Return(expr) => { Statement::Return(expr) => {
return RunEnd::Return(self.resolve_expr(expr)); let val = self.resolve_expr(expr)?;
self.vartable.truncate(framepointer);
return Ok(BlockExit::Return(val));
} }
Statement::Expr(expr) => {
self.resolve_expr(expr)?;
}
Statement::Declaration(decl) => {
let rhs = self.resolve_expr(&decl.rhs)?;
self.vartable.push(rhs);
}
Statement::Block(block) => match self.run_block(block)? {
// Propagate return, continue and break
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
self.vartable.truncate(framepointer);
return Ok(be);
}
_ => (),
},
Statement::Loop(looop) => { Statement::Loop(looop) => {
// loop runs as long condition != 0 // loop runs as long condition != 0
loop { loop {
if matches!(self.resolve_expr(&looop.condition), Value::I64(0)) { if let Some(condition) = &looop.condition {
if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
break; break;
} }
}
match self.run(&looop.body) { let be = self.run_block(&looop.body)?;
RunEnd::Return(val) => return RunEnd::Return(val), match be {
RunEnd::End => (), // Propagate return
be @ BlockExit::Return(_) => {
self.vartable.truncate(framepointer);
return Ok(be);
}
BlockExit::Break => break,
BlockExit::Continue | BlockExit::Normal => (),
} }
if let Some(adv) = &looop.advancement { if let Some(adv) = &looop.advancement {
self.resolve_expr(&adv); self.resolve_expr(&adv)?;
} }
} }
} }
Statement::Print(expr) => { Statement::Print(expr) => {
let result = self.resolve_expr(expr); let result = self.resolve_expr(expr)?;
print!("{}", result);
}
Statement::If(If {condition, body_true, body_false}) => { if self.capture_output {
let end = if matches!(self.resolve_expr(condition), Value::I64(0)) { self.output.push(result)
self.run(body_false)
} else { } else {
self.run(body_true) print!("{}", self.value_to_string(&result));
}
}
Statement::If(If {
condition,
body_true,
body_false,
}) => {
let exit = if matches!(self.resolve_expr(condition)?, Value::I64(0)) {
self.run_block(body_false)?
} else {
self.run_block(body_true)?
}; };
match end {
RunEnd::Return(val) => return RunEnd::Return(val), match exit {
RunEnd::End => (), // Propagate return, continue and break
be @ (BlockExit::Return(_) | BlockExit::Continue | BlockExit::Break) => {
self.vartable.truncate(framepointer);
return Ok(be);
}
_ => (),
} }
} }
Statement::FunDecl(name, args, body) => {
self.funtable.insert(name.clone(), (args.clone(), body.clone()).into()); Statement::FunDeclare(fundec) => {
self.funtable.push(fundec.clone());
} }
} }
} }
RunEnd::End self.vartable.truncate(framepointer);
Ok(BlockExit::Normal)
} }
fn resolve_expr(&mut self, expr: &Expression) -> Value { fn resolve_expr(&mut self, expr: &Expression) -> Result<Value, RuntimeError> {
match expr { let val = match expr {
Expression::I64(val) => Value::I64(*val), Expression::I64(val) => Value::I64(*val),
Expression::ArrayLiteral(size) => {
let size = match self.resolve_expr(size)? {
Value::I64(size) if !size.is_negative() => size,
val => return Err(RuntimeError::InvalidArrayIndex(val)),
};
Value::Array(Rc::new(RefCell::new(vec![Value::I64(0); size as usize])))
}
Expression::String(text) => Value::String(text.clone()), Expression::String(text) => Value::String(text.clone()),
Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs), Expression::BinOp(bo, lhs, rhs) => self.resolve_binop(bo, lhs, rhs)?,
Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand), Expression::UnOp(uo, operand) => self.resolve_unop(uo, operand)?,
Expression::Var(name) => self.resolve_var(name), Expression::Var(name, idx) => self.resolve_var(*name, *idx)?,
Expression::FunCall(name, args) => { Expression::ArrayAccess(name, idx, arr_idx) => {
let fun = self.funtable.get(name).expect("Function not declared").clone(); self.resolve_array_access(*name, *idx, arr_idx)?
for i in 0 .. args.len() {
let val = self.resolve_expr(&args[i]);
self.vartable.insert(fun.borrow().0[i].clone(), val);
} }
if fun.borrow().0.len() != args.len() { Expression::FunCall(fun_name, fun_stackpos, args) => {
panic!("Invalid number of arguments for function"); let args_len = args.len();
// All of the arg expressions must be resolved before pushing the vars on the stack,
// otherwise the stack positions are incorrect while resolving
let args = args
.iter()
.map(|arg| self.resolve_expr(arg))
.collect::<Vec<_>>();
for arg in args {
self.vartable.push(arg?);
} }
let end = self.run(&fun.borrow().1); // Function existance has been verified in the parser, so unwrap here shouldn't fail
match end { let expected_num_args = self.funtable.get(*fun_stackpos).unwrap().argnames.len();
RunEnd::Return(val) => val,
RunEnd::End => Value::I64(0), if expected_num_args != args_len {
let fun_name = self
.stringstore
.lookup(*fun_name)
.cloned()
.unwrap_or("<unknown>".to_string());
return Err(RuntimeError::InvalidNumberOfArgs(
fun_name,
expected_num_args,
args_len,
));
} }
match self.run_block_fp_offset(
&Rc::clone(&self.funtable.get(*fun_stackpos).unwrap().body),
expected_num_args,
)? {
BlockExit::Normal | BlockExit::Continue | BlockExit::Break => Value::Void,
BlockExit::Return(val) => val,
}
}
};
Ok(val)
}
fn resolve_array_access(
&mut self,
name: Sid,
idx: usize,
arr_idx: &Expression,
) -> Result<Value, RuntimeError> {
let arr_idx = match self.resolve_expr(arr_idx)? {
Value::I64(size) if !size.is_negative() => size,
val => return Err(RuntimeError::InvalidArrayIndex(val)),
};
let val = match self.get_var(idx) {
Some(val) => val,
None => {
return Err(RuntimeError::VarUsedNotDeclared(
self.stringstore
.lookup(name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
}
};
let arr = match val {
Value::Array(arr) => arr,
_ => {
return Err(RuntimeError::TryingToIndexNonArray(
self.stringstore
.lookup(name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
}
};
let arr = arr.borrow_mut();
arr.get(arr_idx as usize)
.cloned()
.ok_or(RuntimeError::ArrayOutOfBounds(arr_idx as usize, arr.len()))
}
fn resolve_var(&mut self, name: Sid, idx: usize) -> Result<Value, RuntimeError> {
match self.get_var(idx) {
Some(val) => Ok(val),
None => {
return Err(RuntimeError::VarUsedNotDeclared(
self.stringstore
.lookup(name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
} }
} }
} }
fn resolve_var(&mut self, name: &str) -> Value { fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Result<Value, RuntimeError> {
match self.vartable.get(name) { let operand = self.resolve_expr(operand)?;
Some(val) => val.clone(),
None => panic!("Variable '{}' used but not declared", name),
}
}
fn resolve_unop(&mut self, uo: &UnOpType, operand: &Expression) -> Value { Ok(match (operand, uo) {
let operand = self.resolve_expr(operand);
match (operand, uo) {
(Value::I64(val), UnOpType::Negate) => Value::I64(-val), (Value::I64(val), UnOpType::Negate) => Value::I64(-val),
(Value::I64(val), UnOpType::BNot) => Value::I64(!val), (Value::I64(val), UnOpType::BNot) => Value::I64(!val),
(Value::I64(val), UnOpType::LNot) => Value::I64(if val == 0 { 1 } else { 0 }), (Value::I64(val), UnOpType::LNot) => Value::I64(if val == 0 { 1 } else { 0 }),
_ => panic!("Value type is not compatible with unary operation"), (val, _) => return Err(RuntimeError::UnOpInvalidType(val)),
} })
} }
fn resolve_binop(&mut self, bo: &BinOpType, lhs: &Expression, rhs: &Expression) -> Value { fn resolve_binop(
let rhs = self.resolve_expr(rhs); &mut self,
bo: &BinOpType,
lhs: &Expression,
rhs: &Expression,
) -> Result<Value, RuntimeError> {
let rhs = self.resolve_expr(rhs)?;
match (&bo, &lhs) { match (&bo, &lhs) {
(BinOpType::Declare, Expression::Var(name)) => { (BinOpType::Assign, Expression::Var(name, idx)) => {
self.vartable.insert(name.clone(), rhs.clone()); match self.get_var_mut(*idx) {
return rhs;
}
(BinOpType::Assign, Expression::Var(name)) => {
match self.vartable.get_mut(name) {
Some(val) => *val = rhs.clone(), Some(val) => *val = rhs.clone(),
None => panic!("Runtime Error: Trying to assign value to undeclared variable"), None => {
return Err(RuntimeError::VarUsedNotDeclared(
self.stringstore
.lookup(*name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
} }
return rhs;
} }
_ => () return Ok(rhs);
}
(BinOpType::Assign, Expression::ArrayAccess(name, idx, arr_idx)) => {
let arr_idx = match self.resolve_expr(arr_idx)? {
Value::I64(size) if !size.is_negative() => size,
val => return Err(RuntimeError::InvalidArrayIndex(val)),
};
let val = match self.get_var_mut(*idx) {
Some(val) => val,
None => {
return Err(RuntimeError::VarUsedNotDeclared(
self.stringstore
.lookup(*name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
}
};
match val {
Value::Array(arr) => arr.borrow_mut()[arr_idx as usize] = rhs.clone(),
_ => {
return Err(RuntimeError::TryingToIndexNonArray(
self.stringstore
.lookup(*name)
.cloned()
.unwrap_or_else(|| "<unknown>".to_string()),
))
}
} }
let lhs = self.resolve_expr(lhs); return Ok(rhs);
}
_ => (),
}
match (lhs, rhs) { let lhs = self.resolve_expr(lhs)?;
let result = match (lhs, rhs) {
(Value::I64(lhs), Value::I64(rhs)) => match bo { (Value::I64(lhs), Value::I64(rhs)) => match bo {
BinOpType::Add => Value::I64(lhs + rhs), BinOpType::Add => Value::I64(lhs + rhs),
BinOpType::Mul => Value::I64(lhs * rhs), BinOpType::Mul => Value::I64(lhs * rhs),
BinOpType::Sub => Value::I64(lhs - rhs), BinOpType::Sub => Value::I64(lhs - rhs),
BinOpType::Div => Value::I64(lhs / rhs), BinOpType::Div => {
BinOpType::Mod => Value::I64(lhs % rhs), Value::I64(lhs.checked_div(rhs).ok_or(RuntimeError::DivideByZero)?)
}
BinOpType::Mod => {
Value::I64(lhs.checked_rem(rhs).ok_or(RuntimeError::DivideByZero)?)
}
BinOpType::BOr => Value::I64(lhs | rhs), BinOpType::BOr => Value::I64(lhs | rhs),
BinOpType::BAnd => Value::I64(lhs & rhs), BinOpType::BAnd => Value::I64(lhs & rhs),
BinOpType::BXor => Value::I64(lhs ^ rhs), BinOpType::BXor => Value::I64(lhs ^ rhs),
@@ -181,27 +448,33 @@ impl Interpreter {
BinOpType::Greater => Value::I64(if lhs > rhs { 1 } else { 0 }), BinOpType::Greater => Value::I64(if lhs > rhs { 1 } else { 0 }),
BinOpType::GreaterEqu => Value::I64(if lhs >= rhs { 1 } else { 0 }), BinOpType::GreaterEqu => Value::I64(if lhs >= rhs { 1 } else { 0 }),
BinOpType::Declare | BinOpType::Assign => unreachable!(), BinOpType::Assign => unreachable!(),
}, },
_ => panic!("Value types are not compatible"), (lhs, rhs) => return Err(RuntimeError::BinOpIncompatibleTypes(lhs, rhs)),
} };
}
Ok(result)
} }
impl Display for Value { fn value_to_string(&self, val: &Value) -> String {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match val {
match self { Value::I64(val) => format!("{}", val),
Value::I64(val) => write!(f, "{}", val), Value::Array(val) => format!("{:?}", val.borrow()),
Value::String(text) => write!(f, "{}", text), Value::String(text) => format!(
"{}",
self.stringstore
.lookup(*text)
.unwrap_or(&"<invalid string>".to_string())
),
Value::Void => format!("void"),
} }
} }
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::{Interpreter, Value}; use super::{Interpreter, Value};
use crate::ast::{Expression, BinOpType}; use crate::ast::{BinOpType, Expression};
#[test] #[test]
fn test_interpreter_expr() { fn test_interpreter_expr() {
@@ -212,7 +485,12 @@ mod test {
Expression::BinOp( Expression::BinOp(
BinOpType::Add, BinOpType::Add,
Expression::I64(1).into(), Expression::I64(1).into(),
Expression::BinOp(BinOpType::Mul, Expression::I64(2).into(), Expression::I64(3).into()).into(), Expression::BinOp(
BinOpType::Mul,
Expression::I64(2).into(),
Expression::I64(3).into(),
)
.into(),
) )
.into(), .into(),
Expression::I64(4).into(), Expression::I64(4).into(),
@@ -221,7 +499,7 @@ mod test {
let expected = Value::I64(11); let expected = Value::I64(11);
let mut interpreter = Interpreter::new(); let mut interpreter = Interpreter::new();
let actual = interpreter.resolve_expr(&ast); let actual = interpreter.resolve_expr(&ast).unwrap();
assert_eq!(expected, actual); assert_eq!(expected, actual);
} }

View File

@@ -1,8 +1,8 @@
use crate::token::Token;
use anyhow::Result;
use std::{iter::Peekable, str::Chars}; use std::{iter::Peekable, str::Chars};
use thiserror::Error; use thiserror::Error;
use crate::{token::Token, T};
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum LexErr { pub enum LexErr {
#[error("Failed to parse '{0}' as i64")] #[error("Failed to parse '{0}' as i64")]
@@ -20,98 +20,101 @@ pub enum LexErr {
/// Lex the provided code into a Token Buffer /// Lex the provided code into a Token Buffer
pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> { pub fn lex(code: &str) -> Result<Vec<Token>, LexErr> {
let mut lexer = Lexer::new(code); let lexer = Lexer::new(code);
lexer.lex() lexer.lex()
} }
struct Lexer<'a> { struct Lexer<'a> {
/// The sourcecode text as an iterator over the chars /// The sourcecode text as an iterator over the chars
code: Peekable<Chars<'a>>, code: Peekable<Chars<'a>>,
/// The lexed tokens
tokens: Vec<Token>,
/// The sourcecode character that is currently being lexed
current_char: char,
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
fn new(code: &'a str) -> Self { fn new(code: &'a str) -> Self {
let code = code.chars().peekable(); let code = code.chars().peekable();
Self { code } let tokens = Vec::new();
let current_char = '\0';
Self {
code,
tokens,
current_char,
}
} }
fn lex(&mut self) -> Result<Vec<Token>, LexErr> { fn lex(mut self) -> Result<Vec<Token>, LexErr> {
let mut tokens = Vec::new();
loop { loop {
match self.next() { self.current_char = self.next();
match (self.current_char, self.peek()) {
// Stop lexing at EOF // Stop lexing at EOF
'\0' => break, ('\0', _) => break,
// Skip whitespace // Skip whitespace
' ' | '\t' | '\n' | '\r' => (), (' ' | '\t' | '\n' | '\r', _) => (),
// Line comment. Consume every char until linefeed (next line) // Line comment. Consume every char until linefeed (next line)
'/' if matches!(self.peek(), '/') => while !matches!(self.next(), '\n' | '\0') {}, ('/', '/') => while !matches!(self.next(), '\n' | '\0') {},
// Double character tokens // Double character tokens
'>' if matches!(self.peek(), '>') => { ('>', '>') => self.push_tok_consume(T![>>]),
self.next(); ('<', '<') => self.push_tok_consume(T![<<]),
tokens.push(Token::Shr); ('=', '=') => self.push_tok_consume(T![==]),
} ('!', '=') => self.push_tok_consume(T![!=]),
'<' if matches!(self.peek(), '<') => { ('<', '=') => self.push_tok_consume(T![<=]),
self.next(); ('>', '=') => self.push_tok_consume(T![>=]),
tokens.push(Token::Shl); ('<', '-') => self.push_tok_consume(T![<-]),
} ('&', '&') => self.push_tok_consume(T![&&]),
'=' if matches!(self.peek(), '=') => { ('|', '|') => self.push_tok_consume(T![||]),
self.next();
tokens.push(Token::EquEqu);
}
'!' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::NotEqu);
}
'<' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::LAngleEqu);
}
'>' if matches!(self.peek(), '=') => {
self.next();
tokens.push(Token::RAngleEqu);
}
'<' if matches!(self.peek(), '-') => {
self.next();
tokens.push(Token::LArrow);
}
'&' if matches!(self.peek(), '&') => {
self.next();
tokens.push(Token::LAnd);
}
'|' if matches!(self.peek(), '|') => {
self.next();
tokens.push(Token::LOr);
}
// Single character tokens // Single character tokens
';' => tokens.push(Token::Semicolon), (',', _) => self.push_tok(T![,]),
'+' => tokens.push(Token::Add), (';', _) => self.push_tok(T![;]),
'-' => tokens.push(Token::Sub), ('+', _) => self.push_tok(T![+]),
'*' => tokens.push(Token::Mul), ('-', _) => self.push_tok(T![-]),
'/' => tokens.push(Token::Div), ('*', _) => self.push_tok(T![*]),
'%' => tokens.push(Token::Mod), ('/', _) => self.push_tok(T![/]),
'|' => tokens.push(Token::BOr), ('%', _) => self.push_tok(T![%]),
'&' => tokens.push(Token::BAnd), ('|', _) => self.push_tok(T![|]),
'^' => tokens.push(Token::BXor), ('&', _) => self.push_tok(T![&]),
'(' => tokens.push(Token::LParen), ('^', _) => self.push_tok(T![^]),
')' => tokens.push(Token::RParen), ('(', _) => self.push_tok(T!['(']),
'~' => tokens.push(Token::Tilde), (')', _) => self.push_tok(T![')']),
'<' => tokens.push(Token::LAngle), ('~', _) => self.push_tok(T![~]),
'>' => tokens.push(Token::RAngle), ('<', _) => self.push_tok(T![<]),
'=' => tokens.push(Token::Equ), ('>', _) => self.push_tok(T![>]),
'{' => tokens.push(Token::LBraces), ('=', _) => self.push_tok(T![=]),
'}' => tokens.push(Token::RBraces), ('{', _) => self.push_tok(T!['{']),
'!' => tokens.push(Token::LNot), ('}', _) => self.push_tok(T!['}']),
',' => tokens.push(Token::Comma), ('!', _) => self.push_tok(T![!]),
('[', _) => self.push_tok(T!['[']),
(']', _) => self.push_tok(T![']']),
// Lex numbers // Special tokens with variable length
ch @ '0'..='9' => {
// Lex multiple characters together as numbers
('0'..='9', _) => self.lex_number()?,
// Lex multiple characters together as a string
('"', _) => self.lex_str()?,
// Lex multiple characters together as identifier
('a'..='z' | 'A'..='Z' | '_', _) => self.lex_identifier()?,
(ch, _) => Err(LexErr::UnexpectedChar(ch))?,
}
}
Ok(self.tokens)
}
/// Lex multiple characters as a number until encountering a non numeric digit. The
/// successfully lexed i64 literal token is appended to the stored tokens.
fn lex_number(&mut self) -> Result<(), LexErr> {
// String representation of the integer value // String representation of the integer value
let mut sval = String::from(ch); let mut sval = String::from(self.current_char);
// Do as long as a next char exists and it is a numeric char // Do as long as a next char exists and it is a numeric char
loop { loop {
@@ -131,11 +134,15 @@ impl<'a> Lexer<'a> {
// Try to convert the string representation of the value to i64 // Try to convert the string representation of the value to i64
let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?; let i64val = sval.parse().map_err(|_| LexErr::NumericParse(sval))?;
tokens.push(Token::I64(i64val));
self.push_tok(T![i64(i64val)]);
Ok(())
} }
// Lex a string /// Lex characters as a string until encountering an unescaped closing doublequoute char '"'.
'"' => { /// The successfully lexed string literal token is appended to the stored tokens.
fn lex_str(&mut self) -> Result<(), LexErr> {
// Opening " was consumed in match // Opening " was consumed in match
let mut text = String::new(); let mut text = String::new();
@@ -165,12 +172,15 @@ impl<'a> Lexer<'a> {
// Consume closing " // Consume closing "
self.next(); self.next();
tokens.push(Token::String(text)) self.push_tok(T![str(text)]);
Ok(())
} }
// Lex characters as identifier /// Lex characters from the text as an identifier. The successfully lexed ident or keyword
ch @ ('a'..='z' | 'A'..='Z' | '_') => { /// token is appended to the stored tokens.
let mut ident = String::from(ch); fn lex_identifier(&mut self) -> Result<(), LexErr> {
let mut ident = String::from(self.current_char);
// Do as long as a next char exists and it is a valid char for an identifier // Do as long as a next char exists and it is a valid char for an identifier
loop { loop {
@@ -186,25 +196,33 @@ impl<'a> Lexer<'a> {
// Check for pre-defined keywords // Check for pre-defined keywords
let token = match ident.as_str() { let token = match ident.as_str() {
"loop" => Token::Loop, "loop" => T![loop],
"print" => Token::Print, "print" => T![print],
"if" => Token::If, "if" => T![if],
"else" => Token::Else, "else" => T![else],
"fun" => Token::Fun, "fun" => T![fun],
"return" => Token::Return, "return" => T![return],
"break" => T![break],
"continue" => T![continue],
// If it doesn't match a keyword, it is a normal identifier // If it doesn't match a keyword, it is a normal identifier
_ => Token::Ident(ident), _ => T![ident(ident)],
}; };
tokens.push(token); self.push_tok(token);
Ok(())
} }
ch => Err(LexErr::UnexpectedChar(ch))?, /// Push the given token into the stored tokens
} fn push_tok(&mut self, token: Token) {
self.tokens.push(token);
} }
Ok(tokens) /// Same as `push_tok` but also consumes the next token, removing it from the code iter
fn push_tok_consume(&mut self, token: Token) {
self.next();
self.tokens.push(token);
} }
/// Advance to next character and return the removed char /// Advance to next character and return the removed char
@@ -220,31 +238,51 @@ impl<'a> Lexer<'a> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{lex, Token}; use crate::{lexer::lex, T};
#[test] #[test]
fn test_lexer() { fn test_lexer() {
let code = "33 +5*2 + 4456467*2334+3 % - / << ^ | & >>"; let code = r#"53+1-567_000 * / % | ~ ! < > & ^ ({[]});= <- >= <=
== != && || << >> loop if else print my_123var "hello \t world\r\n\"\\""#;
let expected = vec![ let expected = vec![
Token::I64(33), T![i64(53)],
Token::Add, T![+],
Token::I64(5), T![i64(1)],
Token::Mul, T![-],
Token::I64(2), T![i64(567_000)],
Token::Add, T![*],
Token::I64(4456467), T![/],
Token::Mul, T![%],
Token::I64(2334), T![|],
Token::Add, T![~],
Token::I64(3), T![!],
Token::Mod, T![<],
Token::Sub, T![>],
Token::Div, T![&],
Token::Shl, T![^],
Token::BXor, T!['('],
Token::BOr, T!['{'],
Token::BAnd, T!['['],
Token::Shr, T![']'],
T!['}'],
T![')'],
T![;],
T![=],
T![<-],
T![>=],
T![<=],
T![==],
T![!=],
T![&&],
T![||],
T![<<],
T![>>],
T![loop],
T![if],
T![else],
T![print],
T![ident("my_123var".to_string())],
T![str("hello \t world\r\n\"\\".to_string())],
]; ];
let actual = lex(code).unwrap(); let actual = lex(code).unwrap();

View File

@@ -1,5 +1,61 @@
pub mod lexer;
pub mod token;
pub mod parser;
pub mod ast; pub mod ast;
pub mod interpreter; pub mod interpreter;
pub mod lexer;
pub mod parser;
pub mod token;
pub mod stringstore;
pub mod astoptimizer;
pub mod util;
#[cfg(test)]
mod tests {
use crate::interpreter::{Interpreter, Value};
use std::fs::read_to_string;
fn run_example_check_single_i64_output(filename: &str, correct_result: i64) {
let mut interpreter = Interpreter::new();
interpreter.capture_output = true;
let code = read_to_string(format!("examples/{filename}")).unwrap();
interpreter.run_str(&code);
let expected_output = [Value::I64(correct_result)];
assert_eq!(interpreter.output(), &expected_output);
}
#[test]
fn test_euler1() {
run_example_check_single_i64_output("euler1.nek", 233168);
}
#[test]
fn test_euler2() {
run_example_check_single_i64_output("euler2.nek", 4613732);
}
#[test]
fn test_euler3() {
run_example_check_single_i64_output("euler3.nek", 6857);
}
#[test]
fn test_euler4() {
run_example_check_single_i64_output("euler4.nek", 906609);
}
#[test]
fn test_euler5() {
run_example_check_single_i64_output("euler5.nek", 232792560);
}
#[test]
fn test_recursive_fib() {
run_example_check_single_i64_output("recursive_fib.nek", 832040);
}
#[test]
fn test_functions() {
run_example_check_single_i64_output("test_functions.nek", 69);
}
}

View File

@@ -1,18 +1,16 @@
use std::{env::args, fs, io::{stdout, Write, stdin}}; use std::{env::args, fs, process::exit};
use nek_lang::interpreter::Interpreter;
use nek_lang::{interpreter::Interpreter, nice_panic};
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct CliConfig { struct CliConfig {
print_tokens: bool, print_tokens: bool,
print_ast: bool, print_ast: bool,
interactive: bool, no_optimizations: bool,
file: Option<String>, file: Option<String>,
} }
fn main() { fn main() {
let mut conf = CliConfig::default(); let mut conf = CliConfig::default();
// Go through all commandline arguments except the first (filename) // Go through all commandline arguments except the first (filename)
@@ -20,36 +18,39 @@ fn main() {
match arg.as_str() { match arg.as_str() {
"--token" | "-t" => conf.print_tokens = true, "--token" | "-t" => conf.print_tokens = true,
"--ast" | "-a" => conf.print_ast = true, "--ast" | "-a" => conf.print_ast = true,
"--interactive" | "-i" => conf.interactive = true, "--no-opt" | "-n" => conf.no_optimizations = true,
file if conf.file.is_none() => conf.file = Some(file.to_string()), "--help" | "-h" => print_help(),
_ => panic!("Invalid argument: '{}'", arg), file if !arg.starts_with("-") && conf.file.is_none() => {
conf.file = Some(file.to_string())
}
_ => nice_panic!("Error: Invalid argument '{}'", arg),
} }
} }
let mut interpreter = Interpreter::new(); let mut interpreter = Interpreter::new();
interpreter.print_tokens = conf.print_tokens;
interpreter.print_ast = conf.print_ast;
interpreter.optimize_ast = !conf.no_optimizations;
if let Some(file) = &conf.file { if let Some(file) = &conf.file {
let code = fs::read_to_string(file).expect(&format!("File not found: '{}'", file)); let code = match fs::read_to_string(file) {
interpreter.run_str(&code, conf.print_tokens, conf.print_ast); Ok(code) => code,
} Err(_) => nice_panic!("Error: Could not read file '{}'", file),
};
if conf.interactive || conf.file.is_none() { interpreter.run_str(&code);
let mut code = String::new(); } else {
println!("Error: No file given\n");
loop { print_help();
print!(">> "); }
stdout().flush().unwrap();
code.clear();
stdin().read_line(&mut code).unwrap();
if code.trim() == "exit" {
break;
}
interpreter.run_str(&code, conf.print_tokens, conf.print_ast);
}
} }
fn print_help() {
println!("Usage nek-lang [FLAGS] [FILE]");
println!("FLAGS: ");
println!("-t, --token Print the lexed tokens");
println!("-a, --ast Print the abstract syntax tree");
println!("-n, --no-opt Disable the AST optimizations");
println!("-h, --help Show this help screen");
exit(0);
} }

View File

@@ -1,215 +1,328 @@
use std::iter::Peekable; use thiserror::Error;
use crate::ast::*; use crate::{
use crate::token::Token; ast::{Ast, BlockScope, Expression, FunDecl, If, Loop, Statement, VarDecl},
stringstore::{Sid, StringStore},
token::Token,
util::{PutBackIter, PutBackableExt},
T,
};
#[derive(Debug, Error)]
pub enum ParseErr {
#[error("Unexpected Token \"{0:?}\", expected \"{1}\"")]
UnexpectedToken(Token, String),
#[error("Left hand side of declaration is not a variable")]
DeclarationOfNonVar,
#[error("Use of undefined variable \"{0}\"")]
UseOfUndeclaredVar(String),
#[error("Use of undefined function \"{0}\"")]
UseOfUndeclaredFun(String),
#[error("Redeclation of function \"{0}\"")]
RedeclarationFun(String),
#[error("Function not declared at top level \"{0}\"")]
FunctionOnNonTopLevel(String),
}
type ResPE<T> = Result<T, ParseErr>;
macro_rules! validate_next {
($self:ident, $expected_tok:pat, $expected_str:expr) => {
match $self.next() {
$expected_tok => (),
tok => return Err(ParseErr::UnexpectedToken(tok, format!("{}", $expected_str))),
}
};
}
/// Parse the given tokens into an abstract syntax tree
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> ResPE<Ast> {
let parser = Parser::new(tokens);
parser.parse()
}
struct Parser<T: Iterator<Item = Token>> { struct Parser<T: Iterator<Item = Token>> {
tokens: Peekable<T>, tokens: PutBackIter<T>,
string_store: StringStore,
var_stack: Vec<Sid>,
fun_stack: Vec<Sid>,
nesting_level: usize,
} }
impl<T: Iterator<Item = Token>> Parser<T> { impl<T: Iterator<Item = Token>> Parser<T> {
/// Create a new parser to parse the given Token Stream /// Create a new parser to parse the given Token Stream
fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self { pub fn new<A: IntoIterator<IntoIter = T>>(tokens: A) -> Self {
let tokens = tokens.into_iter().peekable(); let tokens = tokens.into_iter().putbackable();
Self { tokens } let string_store = StringStore::new();
let var_stack = Vec::new();
let fun_stack = Vec::new();
Self {
tokens,
string_store,
var_stack,
fun_stack,
nesting_level: 0,
}
} }
fn parse(&mut self) -> Ast { pub fn parse(mut self) -> ResPE<Ast> {
let main = self.parse_scoped_block()?;
Ok(Ast {
main,
stringstore: self.string_store,
})
}
fn parse_scoped_block(&mut self) -> ResPE<BlockScope> {
self.parse_scoped_block_fp_offset(0)
}
/// Parse tokens into an abstract syntax tree. This will continuously parse statements until
/// encountering end-of-file or a block end '}' .
fn parse_scoped_block_fp_offset(&mut self, framepoint_offset: usize) -> ResPE<BlockScope> {
self.nesting_level += 1;
let framepointer = self.var_stack.len() - framepoint_offset;
let mut prog = Vec::new(); let mut prog = Vec::new();
loop { loop {
match self.peek() { match self.peek() {
Token::Semicolon => { T![;] => {
self.next(); self.next();
} }
Token::EoF => break,
Token::RBraces => { T![EoF] | T!['}'] => break,
break;
T!['{'] => {
self.next();
prog.push(Statement::Block(self.parse_scoped_block()?));
validate_next!(self, T!['}'], "}");
} }
// By default try to lex a statement // By default try to lex a statement
_ => prog.push(self.parse_stmt()), _ => prog.push(self.parse_stmt()?),
} }
} }
Ast { prog } self.var_stack.truncate(framepointer);
self.nesting_level -= 1;
Ok(prog)
} }
fn parse_stmt(&mut self) -> Statement { /// Parse a single statement from the tokens.
match self.peek() { fn parse_stmt(&mut self) -> ResPE<Statement> {
Token::Loop => Statement::Loop(self.parse_loop()), let stmt = match self.peek() {
T![break] => {
Token::Print => {
self.next(); self.next();
let expr = self.parse_expr(); validate_next!(self, T![;], ";");
Statement::Break
}
T![continue] => {
self.next();
validate_next!(self, T![;], ";");
Statement::Continue
}
T![loop] => Statement::Loop(self.parse_loop()?),
T![print] => {
self.next();
let expr = self.parse_expr()?;
// After a statement, there must be a semicolon // After a statement, there must be a semicolon
if !matches!(self.next(), Token::Semicolon) { validate_next!(self, T![;], ";");
panic!("Expected semicolon after statement");
}
Statement::Print(expr) Statement::Print(expr)
} }
Token::Return => { T![return] => {
self.next(); self.next();
let stmt = Statement::Return(self.parse_expr()?);
let expr = self.parse_expr();
// After a statement, there must be a semicolon // After a statement, there must be a semicolon
if !matches!(self.next(), Token::Semicolon) { validate_next!(self, T![;], ";");
panic!("Expected semicolon after statement");
}
Statement::Return(expr)
}
Token::If => Statement::If(self.parse_if()),
Token::Fun => {
self.next();
let name = match self.next() {
Token::Ident(name) => name,
_ => panic!("Error lexing function: Expected ident token"),
};
let mut args = Vec::new();
if !matches!(self.next(), Token::LParen) {
panic!("Expected opening parenthesis");
}
while self.peek() != &Token::RParen {
let argname = match self.next() {
Token::Ident(argname) => argname,
_ => panic!("Error lexing function: Expected ident token for argname"),
};
args.push(argname);
if self.peek() == &Token::Comma {
self.next();
}
}
self.next();
if !matches!(self.next(), Token::LBraces) {
panic!("Expected opening braces");
}
let body = self.parse();
if !matches!(self.next(), Token::RBraces) {
panic!("Expected closing braces");
}
Statement::FunDecl(name, args, body)
}
// If it is not a loop, try to lex as an expression
_ => {
let stmt = Statement::Expr(self.parse_expr());
// After a statement, there must be a semicolon
if !matches!(self.next(), Token::Semicolon) {
panic!("Expected semicolon after statement");
}
stmt stmt
} }
}
}
fn parse_if(&mut self) -> If { T![if] => Statement::If(self.parse_if()?),
if !matches!(self.next(), Token::If) {
panic!("Error lexing if: Expected if token");
}
let condition = self.parse_expr(); T![fun] => {
if !matches!(self.next(), Token::LBraces) {
panic!("Error lexing if: Expected '{{'")
}
let body_true = self.parse();
if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing if: Expected '}}'")
}
let mut body_false = Ast::default();
if matches!(self.peek(), Token::Else) {
self.next(); self.next();
if !matches!(self.next(), Token::LBraces) { let fun_name = match self.next() {
panic!("Error lexing if: Expected '{{'") T![ident(fun_name)] => fun_name,
tok => return Err(ParseErr::UnexpectedToken(tok, "<ident>".to_string())),
};
if self.nesting_level > 1 {
return Err(ParseErr::FunctionOnNonTopLevel(fun_name));
} }
body_false = self.parse(); let fun_name = self.string_store.intern_or_lookup(&fun_name);
if !matches!(self.next(), Token::RBraces) { if self.fun_stack.contains(&fun_name) {
panic!("Error lexing if: Expected '}}'") return Err(ParseErr::RedeclarationFun(
self.string_store
.lookup(fun_name)
.cloned()
.unwrap_or("<unknown>".to_string()),
));
}
let fun_stackpos = self.fun_stack.len();
self.fun_stack.push(fun_name);
let mut arg_names = Vec::new();
validate_next!(self, T!['('], "(");
while matches!(self.peek(), T![ident(_)]) {
let var_name = match self.next() {
T![ident(var_name)] => var_name,
_ => unreachable!(),
};
let var_name = self.string_store.intern_or_lookup(&var_name);
arg_names.push(var_name);
// Push the variable onto the varstack
self.var_stack.push(var_name);
// If there are more args skip the comma so that the loop will read the argname
if self.peek() == &T![,] {
self.next();
} }
} }
If { validate_next!(self, T![')'], ")");
validate_next!(self, T!['{'], "{");
// Create the scoped block with a stack offset. This will pop the args that are
// added to the stack while parsing args
let body = self.parse_scoped_block_fp_offset(arg_names.len())?;
validate_next!(self, T!['}'], "}");
Statement::FunDeclare(FunDecl {
name: fun_name,
fun_stackpos,
argnames: arg_names,
body: body.into(),
})
}
_ => {
let first = self.next();
let stmt = match (first, self.peek()) {
(T![ident(name)], T![<-]) => {
self.next();
let rhs = self.parse_expr()?;
let sid = self.string_store.intern_or_lookup(&name);
let sp = self.var_stack.len();
self.var_stack.push(sid);
Statement::Declaration(VarDecl {
name: sid,
var_stackpos: sp,
rhs,
})
}
(first, _) => {
self.putback(first);
Statement::Expr(self.parse_expr()?)
}
};
// After a statement, there must be a semicolon
validate_next!(self, T![;], ";");
stmt
}
};
Ok(stmt)
}
/// Parse an if statement from the tokens
fn parse_if(&mut self) -> ResPE<If> {
validate_next!(self, T![if], "if");
let condition = self.parse_expr()?;
validate_next!(self, T!['{'], "{");
let body_true = self.parse_scoped_block()?;
validate_next!(self, T!['}'], "}");
let mut body_false = BlockScope::default();
if self.peek() == &T![else] {
self.next();
validate_next!(self, T!['{'], "{");
body_false = self.parse_scoped_block()?;
validate_next!(self, T!['}'], "}");
}
Ok(If {
condition, condition,
body_true, body_true,
body_false, body_false,
} })
} }
fn parse_loop(&mut self) -> Loop { /// Parse a loop statement from the tokens
if !matches!(self.next(), Token::Loop) { fn parse_loop(&mut self) -> ResPE<Loop> {
panic!("Error lexing loop: Expected loop token"); validate_next!(self, T![loop], "loop");
}
let condition = self.parse_expr(); let mut condition = None;
let mut advancement = None; let mut advancement = None;
let body; if !matches!(self.peek(), T!['{']) {
condition = Some(self.parse_expr()?);
match self.next() { if matches!(self.peek(), T![;]) {
Token::LBraces => { self.next();
body = self.parse(); advancement = Some(self.parse_expr()?);
}
} }
Token::Semicolon => { validate_next!(self, T!['{'], "{");
advancement = Some(self.parse_expr());
if !matches!(self.next(), Token::LBraces) { let body = self.parse_scoped_block()?;
panic!("Error lexing loop: Expected '{{'")
}
body = self.parse(); validate_next!(self, T!['}'], "}");
}
_ => panic!("Error lexing loop: Expected ';' or '{{'"), Ok(Loop {
}
if !matches!(self.next(), Token::RBraces) {
panic!("Error lexing loop: Expected '}}'")
}
Loop {
condition, condition,
advancement, advancement,
body, body,
} })
} }
fn parse_expr(&mut self) -> Expression { /// Parse a single expression from the tokens
let lhs = self.parse_primary(); fn parse_expr(&mut self) -> ResPE<Expression> {
let lhs = self.parse_primary()?;
self.parse_expr_precedence(lhs, 0) self.parse_expr_precedence(lhs, 0)
} }
/// Parse binary expressions with a precedence equal to or higher than min_prec /// Parse binary expressions with a precedence equal to or higher than min_prec
fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> Expression { fn parse_expr_precedence(&mut self, mut lhs: Expression, min_prec: u8) -> ResPE<Expression> {
while let Some(binop) = &self.peek().try_to_binop() { while let Some(binop) = &self.peek().try_to_binop() {
// Stop if the next operator has a lower binding power // Stop if the next operator has a lower binding power
if !(binop.precedence() >= min_prec) { if !(binop.precedence() >= min_prec) {
@@ -220,150 +333,170 @@ impl<T: Iterator<Item = Token>> Parser<T> {
// valid // valid
let binop = self.next().try_to_binop().unwrap(); let binop = self.next().try_to_binop().unwrap();
let mut rhs = self.parse_primary(); let mut rhs = self.parse_primary()?;
while let Some(binop2) = &self.peek().try_to_binop() { while let Some(binop2) = &self.peek().try_to_binop() {
if !(binop2.precedence() > binop.precedence()) { if !(binop2.precedence() > binop.precedence()) {
break; break;
} }
rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1); rhs = self.parse_expr_precedence(rhs, binop.precedence() + 1)?;
} }
lhs = Expression::BinOp(binop, lhs.into(), rhs.into()); lhs = Expression::BinOp(binop, lhs.into(), rhs.into());
} }
lhs Ok(lhs)
} }
/// Parse a primary expression (for now only number) /// Parse a primary expression (for now only number)
fn parse_primary(&mut self) -> Expression { fn parse_primary(&mut self) -> ResPE<Expression> {
match self.next() { let primary = match self.next() {
// Literal i64 // Literal i64
Token::I64(val) => Expression::I64(val), T![i64(val)] => Expression::I64(val),
// Literal String // Literal String
Token::String(text) => Expression::String(text.into()), T![str(text)] => Expression::String(self.string_store.intern_or_lookup(&text)),
Token::Ident(name) if matches!(self.peek(), Token::LParen) => self.parse_funcall(name), // Array literal. Square brackets containing the array size as expression
T!['['] => {
let size = self.parse_expr()?;
Token::Ident(name) => Expression::Var(name), validate_next!(self, T![']'], "]");
Expression::ArrayLiteral(size.into())
}
// Array sccess, aka indexing. An ident followed by square brackets containing the
// index as an expression
T![ident(name)] if self.peek() == &T!['['] => {
let sid = self.string_store.intern_or_lookup(&name);
let stackpos = self.get_stackpos(sid)?;
self.next();
let index = self.parse_expr()?;
validate_next!(self, T![']'], "]");
Expression::ArrayAccess(sid, stackpos, index.into())
}
T![ident(name)] if self.peek() == &T!['('] => {
// Skip the opening parenthesis
self.next();
let sid = self.string_store.intern_or_lookup(&name);
let mut args = Vec::new();
while !matches!(self.peek(), T![')']) {
let arg = self.parse_expr()?;
args.push(arg);
// If there are more args skip the comma so that the loop will read the argname
if self.peek() == &T![,] {
self.next();
}
}
validate_next!(self, T![')'], ")");
let fun_stackpos = self.get_fun_stackpos(sid)?;
Expression::FunCall(sid, fun_stackpos, args)
}
T![ident(name)] => {
let sid = self.string_store.intern_or_lookup(&name);
let stackpos = self.get_stackpos(sid)?;
Expression::Var(sid, stackpos)
}
// Parentheses grouping // Parentheses grouping
Token::LParen => { T!['('] => {
let inner_expr = self.parse_expr(); let inner_expr = self.parse_expr()?;
// Verify that there is a closing parenthesis // Verify that there is a closing parenthesis
if !matches!(self.next(), Token::RParen) { validate_next!(self, T![')'], ")");
panic!("Error parsing primary expr: Exepected closing parenthesis ')'");
}
inner_expr inner_expr
} }
// Unary negation // Unary operations or invalid token
Token::Sub => { tok => match tok.try_to_unop() {
let operand = self.parse_primary(); Some(uot) => Expression::UnOp(uot, self.parse_primary()?.into()),
Expression::UnOp(UnOpType::Negate, operand.into()) None => return Err(ParseErr::UnexpectedToken(tok, "primary".to_string())),
},
};
Ok(primary)
} }
// Unary bitwise not (bitflip) fn get_stackpos(&self, varid: Sid) -> ResPE<usize> {
Token::Tilde => { self.var_stack
let operand = self.parse_primary(); .iter()
Expression::UnOp(UnOpType::BNot, operand.into()) .rev()
.position(|it| *it == varid)
.map(|it| it)
.ok_or(ParseErr::UseOfUndeclaredVar(
self.string_store
.lookup(varid)
.map(String::from)
.unwrap_or("<unknown>".to_string()),
))
} }
// Unary logical not fn get_fun_stackpos(&self, varid: Sid) -> ResPE<usize> {
Token::LNot => { self.fun_stack
let operand = self.parse_primary(); .iter()
Expression::UnOp(UnOpType::LNot, operand.into()) .rev()
} .position(|it| *it == varid)
.map(|it| self.fun_stack.len() - it - 1)
tok => panic!("Error parsing primary expr: Unexpected Token '{:?}'", tok), .ok_or(ParseErr::UseOfUndeclaredFun(
} self.string_store
} .lookup(varid)
.map(String::from)
fn parse_funcall(&mut self, name: String) -> Expression { .unwrap_or("<unknown>".to_string()),
let mut args = Vec::new(); ))
// Consume (
self.next();
while self.peek() != &Token::RParen {
args.push(self.parse_expr());
if self.peek() == &Token::Comma {
self.next();
}
}
self.next();
Expression::FunCall(name, args)
} }
/// Get the next Token without removing it /// Get the next Token without removing it
fn peek(&mut self) -> &Token { fn peek(&mut self) -> &Token {
self.tokens.peek().unwrap_or(&Token::EoF) self.tokens.peek().unwrap_or(&T![EoF])
}
fn putback(&mut self, tok: Token) {
self.tokens.putback(tok);
} }
/// Advance to next Token and return the removed Token /// Advance to next Token and return the removed Token
fn next(&mut self) -> Token { fn next(&mut self) -> Token {
self.tokens.next().unwrap_or(Token::EoF) self.tokens.next().unwrap_or(T![EoF])
}
}
pub fn parse<T: Iterator<Item = Token>, A: IntoIterator<IntoIter = T>>(tokens: A) -> Ast {
let mut parser = Parser::new(tokens);
parser.parse()
}
impl BinOpType {
/// Get the precedence for a binary operator. Higher value means the OP is stronger binding.
/// For example Multiplication is stronger than addition, so Mul has higher precedence than Add.
///
/// The operator precedences are derived from the C language operator precedences. While not all
/// C operators are included or the exact same, the precedence oder is the same.
/// See: https://en.cppreference.com/w/c/language/operator_precedence
fn precedence(&self) -> u8 {
match self {
BinOpType::Declare => 0,
BinOpType::Assign => 1,
BinOpType::LOr => 2,
BinOpType::LAnd => 3,
BinOpType::BOr => 4,
BinOpType::BXor => 5,
BinOpType::BAnd => 6,
BinOpType::EquEqu | BinOpType::NotEqu => 7,
BinOpType::Less | BinOpType::LessEqu | BinOpType::Greater | BinOpType::GreaterEqu => 8,
BinOpType::Shl | BinOpType::Shr => 9,
BinOpType::Add | BinOpType::Sub => 10,
BinOpType::Mul | BinOpType::Div | BinOpType::Mod => 11,
}
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{parse, BinOpType, Expression};
use crate::{ use crate::{
parser::{Ast, Statement}, ast::{BinOpType, Expression, Statement},
token::Token, parser::parse,
T,
}; };
#[test] #[test]
fn test_parser() { fn test_parser() {
// Expression: 1 + 2 * 3 + 4 // Expression: 1 + 2 * 3 - 4
// With precedence: (1 + (2 * 3)) + 4 // With precedence: (1 + (2 * 3)) - 4
let tokens = [ let tokens = [
Token::I64(1), T![i64(1)],
Token::Add, T![+],
Token::I64(2), T![i64(2)],
Token::Mul, T![*],
Token::I64(3), T![i64(3)],
Token::Sub, T![-],
Token::I64(4), T![i64(4)],
Token::Semicolon, T![;],
]; ];
let expected = Statement::Expr(Expression::BinOp( let expected = Statement::Expr(Expression::BinOp(
@@ -382,11 +515,9 @@ mod tests {
Expression::I64(4).into(), Expression::I64(4).into(),
)); ));
let expected = Ast { let expected = vec![expected];
prog: vec![expected],
};
let actual = parse(tokens); let actual = parse(tokens).unwrap();
assert_eq!(expected, actual); assert_eq!(expected, actual.main);
} }
} }

31
src/stringstore.rs Normal file
View File

@@ -0,0 +1,31 @@
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Sid(usize);
#[derive(Clone, Default)]
pub struct StringStore {
strings: Vec<String>,
sids: HashMap<String, Sid>,
}
impl StringStore {
pub fn new() -> Self {
Self { strings: Vec::new(), sids: HashMap::new() }
}
pub fn intern_or_lookup(&mut self, text: &str) -> Sid {
self.sids.get(text).copied().unwrap_or_else(|| {
let sid = Sid(self.strings.len());
self.strings.push(text.to_string());
self.sids.insert(text.to_string(), sid);
sid
})
}
pub fn lookup(&self, sid: Sid) -> Option<&String> {
self.strings.get(sid.0)
}
}

View File

@@ -1,152 +1,371 @@
use crate::ast::BinOpType; use crate::{
ast::{BinOpType, UnOpType},
T,
};
/// Language keywords
#[derive(Debug, PartialEq, Eq)]
pub enum Keyword {
/// Loop keyword ("loop")
Loop,
/// Print keyword ("print")
Print,
/// If keyword ("if")
If,
/// Else keyword ("else")
Else,
/// Function declaration keyword ("fun")
Fun,
/// Return keyword ("return")
Return,
/// Break keyword ("break")
Break,
/// Continue keyword ("continue")
Continue,
}
/// Literal values
#[derive(Debug, PartialEq, Eq)]
pub enum Literal {
/// Integer literal (64-bit)
I64(i64),
/// String literal
String(String),
}
/// Combined tokens that consist of a combination of characters
#[derive(Debug, PartialEq, Eq)]
pub enum Combo {
/// Equal Equal ("==")
Equal2,
/// Exclamation mark Equal ("!=")
ExclamationMarkEqual,
/// Ampersand Ampersand ("&&")
Ampersand2,
/// Pipe Pipe ("||")
Pipe2,
/// LessThan LessThan ("<<")
LessThan2,
/// GreaterThan GreaterThan (">>")
GreaterThan2,
/// LessThan Equal ("<=")
LessThanEqual,
/// GreaterThan Equal (">=")
GreaterThanEqual,
/// LessThan Minus ("<-")
LessThanMinus,
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Token { pub enum Token {
/// Integer literal (64-bit) /// Literal value token
I64(i64), Literal(Literal),
/// String literal /// Keyword token
String(String), Keyword(Keyword),
/// Identifier (name for variables, functions, ...) /// Identifier (name for variables, functions, ...)
Ident(String), Ident(String),
/// Loop keyword (loop) /// Combined tokens consisting of multiple characters
Loop, Combo(Combo),
/// Print keyword (print)
Print,
/// If keyword (if)
If,
/// Else keyword (else)
Else,
Fun,
/// Comma (",")
Comma, Comma,
Return, /// Equal Sign ("=")
Equal,
/// Left Parenthesis ('(') /// Semicolon (";")
LParen,
/// Right Parenthesis (')')
RParen,
/// Left curly braces ({)
LBraces,
/// Right curly braces (})
RBraces,
/// Plus (+)
Add,
/// Minus (-)
Sub,
/// Asterisk (*)
Mul,
/// Slash (/)
Div,
/// Percent (%)
Mod,
/// Equal Equal (==)
EquEqu,
/// Exclamationmark Equal (!=)
NotEqu,
/// Pipe (|)
BOr,
/// Ampersand (&)
BAnd,
/// Circumflex (^)
BXor,
/// Logical AND (&&)
LAnd,
/// Logical OR (||)
LOr,
/// Shift Left (<<)
Shl,
/// Shift Right (>>)
Shr,
/// Tilde (~)
Tilde,
/// Logical not (!)
LNot,
/// Left angle bracket (<)
LAngle,
/// Right angle bracket (>)
RAngle,
/// Left angle bracket Equal (<=)
LAngleEqu,
/// Left angle bracket Equal (>=)
RAngleEqu,
/// Left arrow (<-)
LArrow,
/// Equal Sign (=)
Equ,
/// Semicolon (;)
Semicolon, Semicolon,
/// End of file /// End of file
EoF, EoF,
/// Left Bracket ("[")
LBracket,
/// Right Bracket ("]")
RBracket,
/// Left Parenthesis ("(")
LParen,
/// Right Parenthesis (")"")
RParen,
/// Left curly braces ("{")
LBraces,
/// Right curly braces ("}")
RBraces,
/// Plus ("+")
Plus,
/// Minus ("-")
Minus,
/// Asterisk ("*")
Asterisk,
/// Slash ("/")
Slash,
/// Percent ("%")
Percent,
/// Pipe ("|")
Pipe,
/// Tilde ("~")
Tilde,
/// Logical not ("!")
Exclamationmark,
/// Left angle bracket ("<")
LessThan,
/// Right angle bracket (">")
GreaterThan,
/// Ampersand ("&")
Ampersand,
/// Circumflex ("^")
Circumflex,
} }
impl Token { impl Token {
/// If the Token can be used as a binary operation type, get the matching BinOpType. Otherwise
/// return None.
pub fn try_to_binop(&self) -> Option<BinOpType> { pub fn try_to_binop(&self) -> Option<BinOpType> {
Some(match self { Some(match self {
Token::Add => BinOpType::Add, T![+] => BinOpType::Add,
Token::Sub => BinOpType::Sub, T![-] => BinOpType::Sub,
Token::Mul => BinOpType::Mul, T![*] => BinOpType::Mul,
Token::Div => BinOpType::Div, T![/] => BinOpType::Div,
Token::Mod => BinOpType::Mod, T![%] => BinOpType::Mod,
Token::BAnd => BinOpType::BAnd, T![&] => BinOpType::BAnd,
Token::BOr => BinOpType::BOr, T![|] => BinOpType::BOr,
Token::BXor => BinOpType::BXor, T![^] => BinOpType::BXor,
Token::LAnd => BinOpType::LAnd, T![&&] => BinOpType::LAnd,
Token::LOr => BinOpType::LOr, T![||] => BinOpType::LOr,
Token::Shl => BinOpType::Shl, T![<<] => BinOpType::Shl,
Token::Shr => BinOpType::Shr, T![>>] => BinOpType::Shr,
Token::EquEqu => BinOpType::EquEqu, T![==] => BinOpType::EquEqu,
Token::NotEqu => BinOpType::NotEqu, T![!=] => BinOpType::NotEqu,
Token::LAngle => BinOpType::Less, T![<] => BinOpType::Less,
Token::LAngleEqu => BinOpType::LessEqu, T![<=] => BinOpType::LessEqu,
Token::RAngle => BinOpType::Greater, T![>] => BinOpType::Greater,
Token::RAngleEqu => BinOpType::GreaterEqu, T![>=] => BinOpType::GreaterEqu,
Token::LArrow => BinOpType::Declare, T![=] => BinOpType::Assign,
Token::Equ => BinOpType::Assign,
_ => return None,
})
}
pub fn try_to_unop(&self) -> Option<UnOpType> {
Some(match self {
T![-] => UnOpType::Negate,
T![!] => UnOpType::LNot,
T![~] => UnOpType::BNot,
_ => return None, _ => return None,
}) })
} }
} }
/// Macro to quickly create a token of the specified kind
#[macro_export]
macro_rules! T {
// Keywords
[loop] => {
crate::token::Token::Keyword(crate::token::Keyword::Loop)
};
[print] => {
crate::token::Token::Keyword(crate::token::Keyword::Print)
};
[if] => {
crate::token::Token::Keyword(crate::token::Keyword::If)
};
[else] => {
crate::token::Token::Keyword(crate::token::Keyword::Else)
};
[fun] => {
crate::token::Token::Keyword(crate::token::Keyword::Fun)
};
[return] => {
crate::token::Token::Keyword(crate::token::Keyword::Return)
};
[break] => {
crate::token::Token::Keyword(crate::token::Keyword::Break)
};
[continue] => {
crate::token::Token::Keyword(crate::token::Keyword::Continue)
};
// Literals
[i64($($val:tt)*)] => {
crate::token::Token::Literal(crate::token::Literal::I64($($val)*))
};
[str($($val:tt)*)] => {
crate::token::Token::Literal(crate::token::Literal::String($($val)*))
};
// Ident
[ident($($val:tt)*)] => {
crate::token::Token::Ident($($val)*)
};
// Combo crate::token::Tokens
[==] => {
crate::token::Token::Combo(crate::token::Combo::Equal2)
};
[!=] => {
crate::token::Token::Combo(crate::token::Combo::ExclamationMarkEqual)
};
[&&] => {
crate::token::Token::Combo(crate::token::Combo::Ampersand2)
};
[||] => {
crate::token::Token::Combo(crate::token::Combo::Pipe2)
};
[<<] => {
crate::token::Token::Combo(crate::token::Combo::LessThan2)
};
[>>] => {
crate::token::Token::Combo(crate::token::Combo::GreaterThan2)
};
[<=] => {
crate::token::Token::Combo(crate::token::Combo::LessThanEqual)
};
[>=] => {
crate::token::Token::Combo(crate::token::Combo::GreaterThanEqual)
};
[<-] => {
crate::token::Token::Combo(crate::token::Combo::LessThanMinus)
};
// Normal Tokens
[,] => {
crate::token::Token::Comma
};
[=] => {
crate::token::Token::Equal
};
[;] => {
crate::token::Token::Semicolon
};
[EoF] => {
crate::token::Token::EoF
};
['['] => {
crate::token::Token::LBracket
};
[']'] => {
crate::token::Token::RBracket
};
['('] => {
crate::token::Token::LParen
};
[')'] => {
crate::token::Token::RParen
};
['{'] => {
crate::token::Token::LBraces
};
['}'] => {
crate::token::Token::RBraces
};
[+] => {
crate::token::Token::Plus
};
[-] => {
crate::token::Token::Minus
};
[*] => {
crate::token::Token::Asterisk
};
[/] => {
crate::token::Token::Slash
};
[%] => {
crate::token::Token::Percent
};
[|] => {
crate::token::Token::Pipe
};
[~] => {
crate::token::Token::Tilde
};
[!] => {
crate::token::Token::Exclamationmark
};
[<] => {
crate::token::Token::LessThan
};
[>] => {
crate::token::Token::GreaterThan
};
[&] => {
crate::token::Token::Ampersand
};
[^] => {
crate::token::Token::Circumflex
};
}

167
src/util.rs Normal file
View File

@@ -0,0 +1,167 @@
/// Exit the program with error code 1 and format-print the given text on stderr. This pretty much
/// works like panic, but doesn't show the additional information that panic adds. Those can be
/// interesting for debugging, but don't look that great when building a release executable for an
/// end user.
/// When running tests or running in debug mode, panic is used to ensure the tests working
/// correctly.
#[macro_export]
macro_rules! nice_panic {
($fmt:expr) => {
{
if cfg!(test) || cfg!(debug_assertions) {
panic!($fmt);
} else {
eprintln!($fmt);
std::process::exit(1);
}
}
};
($fmt:expr, $($arg:tt)*) => {
{
if cfg!(test) || cfg!(debug_assertions) {
panic!($fmt, $($arg)*);
} else {
eprintln!($fmt, $($arg)*);
std::process::exit(1);
}
}
};
}
/// The PutBackIter allows for items to be put back back and to be peeked. Putting an item back
/// will cause it to be the next item returned by `next`. Peeking an item will get a reference to
/// the next item in the iterator without removing it.
///
/// The whole PutBackIter behaves analogous to `std::iter::Peekable` with the addition of the
/// `putback` function. This is slightly slower than `Peekable`, but allows for an unlimited number
/// of putbacks and therefore an unlimited look-ahead range.
pub struct PutBackIter<T: Iterator> {
iter: T,
putback_stack: Vec<T::Item>,
}
impl<T> PutBackIter<T>
where
T: Iterator,
{
/// Make the given iterator putbackable, wrapping it in the PutBackIter type. This effectively
/// adds the `peek` and `putback` functions.
pub fn new(iter: T) -> Self {
Self {
iter,
putback_stack: Vec::new(),
}
}
/// Put the given item back into the iterator. This causes the putbacked items to be returned by
/// next in last-in-first-out order (aka. stack order). Only after all previously putback items
/// have been returned, the actual underlying iterator is used to get items.
/// The number of items that can be put back is unlimited.
pub fn putback(&mut self, it: T::Item) {
self.putback_stack.push(it);
}
/// Peek the next item, getting a reference to it without removing it from the iterator. This
/// also includes items that were previsouly put back and not yet removed.
pub fn peek(&mut self) -> Option<&T::Item> {
if self.putback_stack.is_empty() {
let it = self.next()?;
self.putback(it);
}
self.putback_stack.last()
}
}
impl<T> Iterator for PutBackIter<T>
where
T: Iterator,
{
type Item = T::Item;
fn next(&mut self) -> Option<Self::Item> {
match self.putback_stack.pop() {
Some(it) => Some(it),
None => self.iter.next(),
}
}
}
pub trait PutBackableExt {
/// Make the iterator putbackable, wrapping it in the PutBackIter type. This effectively
/// adds the `peek` and `putback` functions.
fn putbackable(self) -> PutBackIter<Self>
where
Self: Iterator + Sized,
{
PutBackIter::new(self)
}
}
impl<T: Iterator> PutBackableExt for T {}
#[cfg(test)]
mod tests {
use super::PutBackableExt;
#[test]
fn putback_iter_next() {
let mut iter = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_iter();
let mut pb_iter = iter.clone().putbackable();
// Check if next works
for _ in 0..iter.len() {
assert_eq!(pb_iter.next(), iter.next());
}
}
#[test]
fn putback_iter_peek() {
let mut iter_orig = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_iter();
let mut iter = iter_orig.clone();
let mut pb_iter = iter.clone().putbackable();
for _ in 0..iter.len() {
// Check if peek gives a preview of the actual next element
assert_eq!(pb_iter.peek(), iter.next().as_ref());
// Check if next still returns the next (just peeked) element and not the one after
assert_eq!(pb_iter.next(), iter_orig.next());
}
}
#[test]
fn putback_iter_putback() {
let mut iter_orig = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_iter();
let mut iter = iter_orig.clone();
let mut pb_iter = iter.clone().putbackable();
// Get the first 5 items with next and check if they match
let it0 = pb_iter.next();
assert_eq!(it0, iter.next());
let it1 = pb_iter.next();
assert_eq!(it1, iter.next());
let it2 = pb_iter.next();
assert_eq!(it2, iter.next());
let it3 = pb_iter.next();
assert_eq!(it3, iter.next());
let it4 = pb_iter.next();
assert_eq!(it4, iter.next());
// Put one value back and check if `next` works as expected, returning the just put back
// item
pb_iter.putback(it0.unwrap());
assert_eq!(pb_iter.next(), it0);
// Put all values back
pb_iter.putback(it4.unwrap());
pb_iter.putback(it3.unwrap());
pb_iter.putback(it2.unwrap());
pb_iter.putback(it1.unwrap());
pb_iter.putback(it0.unwrap());
// After all values have been put back, the iter should match the original again
for _ in 0..iter.len() {
assert_eq!(pb_iter.next(), iter_orig.next());
}
}
}