summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c65
1 files changed, 65 insertions, 0 deletions
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..68e925b
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,65 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+#include "apl9.h"
+
+Rune primfuncnames[] = L"+-×÷*⍟⌹○!?|⌈⌊⊥⊤⊣⊢=≠≤<>≥≡≢∨∧⍲⍱↑↓⊂⊃⊆⌷⍋⍒⍳⍸∊⍷∪∩~,⍪⍴⌽⊖⍉⍎⍕";
+Rune primmonopnames[] = L"¨⍨⌸⌶&";
+Rune primdyadopnames[] = L"⍣.∘⍤⍥@⍠⌺";
+Rune primhybridnames[] = L"/\⌿⍀";
+
+Datum *
+lexline(Rune *line, int *ntoks)
+{
+ int offset = 0;
+ int len = runestrlen(line);
+ Datum *tokens = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1);
+ *ntoks = 0;
+ while(offset < len){
+ if(isspacerune(line[offset])){
+ offset++;
+ continue;
+ }else if(runestrchr(L"(){}[]", line[offset])){
+ switch(line[offset]){
+ case '(': tokens[*ntoks].tag = LParTag; break;
+ case ')': tokens[*ntoks].tag = RParTag; break;
+ case '{': tokens[*ntoks].tag = LCurlTag; break;
+ case '}': tokens[*ntoks].tag = RCurlTag; break;
+ case '[': tokens[*ntoks].tag = LBracketTag; break;
+ case ']': tokens[*ntoks].tag = RBracketTag; break;
+ }
+ tokens[*ntoks].strrep = runesmprint("%C", line[offset]);
+ offset++;
+ }else if(runestrchr(primfuncnames, line[offset])){
+ tokens[*ntoks].tag = FunctionTag;
+ tokens[*ntoks].strrep = runesmprint("%C", line[offset]);
+ offset++;
+ }else if(runestrchr(primmonopnames, line[offset])){
+ tokens[*ntoks].tag = MonadicOpTag;
+ tokens[*ntoks].strrep = runesmprint("%C", line[offset]);
+ offset++;
+ }else if(runestrchr(primdyadopnames, line[offset])){
+ tokens[*ntoks].tag = DyadicOpTag;
+ tokens[*ntoks].strrep = runesmprint("%C", line[offset]);
+ offset++;
+ }else if(isdigitrune(line[offset])){
+ char buf[64];
+ char *p = buf;
+ while(isdigitrune(line[offset])){
+ p += runetochar(p, &line[offset]);
+ offset++;
+ }
+ *p = 0;
+ tokens[*ntoks].tag = ArrayTag;
+ tokens[*ntoks].array = mkscalarint(atoll(buf));
+ tokens[*ntoks].strrep = runesmprint("%s", buf);
+ }else{
+ print("Can't lex: %S\n", &line[offset]);
+ *ntoks = 0;
+ break;
+ }
+ (*ntoks)++;
+ }
+ return tokens;
+} \ No newline at end of file