From 1ef3119fe613823a2145126c58948361ca7d3cd8 Mon Sep 17 00:00:00 2001 From: Peter Mikkelsen Date: Sat, 8 Jan 2022 22:45:00 +0000 Subject: Add initial code, just to get started --- lexer.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 lexer.c (limited to 'lexer.c') diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..68e925b --- /dev/null +++ b/lexer.c @@ -0,0 +1,65 @@ +#include +#include +#include + +#include "apl9.h" + +Rune primfuncnames[] = L"+-×÷*⍟⌹○!?|⌈⌊⊥⊤⊣⊢=≠≤<>≥≡≢∨∧⍲⍱↑↓⊂⊃⊆⌷⍋⍒⍳⍸∊⍷∪∩~,⍪⍴⌽⊖⍉⍎⍕"; +Rune primmonopnames[] = L"¨⍨⌸⌶&"; +Rune primdyadopnames[] = L"⍣.∘⍤⍥@⍠⌺"; +Rune primhybridnames[] = L"/\⌿⍀"; + +Datum * +lexline(Rune *line, int *ntoks) +{ + int offset = 0; + int len = runestrlen(line); + Datum *tokens = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1); + *ntoks = 0; + while(offset < len){ + if(isspacerune(line[offset])){ + offset++; + continue; + }else if(runestrchr(L"(){}[]", line[offset])){ + switch(line[offset]){ + case '(': tokens[*ntoks].tag = LParTag; break; + case ')': tokens[*ntoks].tag = RParTag; break; + case '{': tokens[*ntoks].tag = LCurlTag; break; + case '}': tokens[*ntoks].tag = RCurlTag; break; + case '[': tokens[*ntoks].tag = LBracketTag; break; + case ']': tokens[*ntoks].tag = RBracketTag; break; + } + tokens[*ntoks].strrep = runesmprint("%C", line[offset]); + offset++; + }else if(runestrchr(primfuncnames, line[offset])){ + tokens[*ntoks].tag = FunctionTag; + tokens[*ntoks].strrep = runesmprint("%C", line[offset]); + offset++; + }else if(runestrchr(primmonopnames, line[offset])){ + tokens[*ntoks].tag = MonadicOpTag; + tokens[*ntoks].strrep = runesmprint("%C", line[offset]); + offset++; + }else if(runestrchr(primdyadopnames, line[offset])){ + tokens[*ntoks].tag = DyadicOpTag; + tokens[*ntoks].strrep = runesmprint("%C", line[offset]); + offset++; + }else if(isdigitrune(line[offset])){ + char buf[64]; + char *p = buf; + while(isdigitrune(line[offset])){ + p += runetochar(p, &line[offset]); + offset++; + } + *p = 0; + tokens[*ntoks].tag = ArrayTag; + tokens[*ntoks].array = mkscalarint(atoll(buf)); + tokens[*ntoks].strrep = runesmprint("%s", buf); + }else{ + print("Can't lex: %S\n", &line[offset]); + *ntoks = 0; + break; + } + (*ntoks)++; + } + return tokens; +} \ No newline at end of file -- cgit v1.2.3