summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
authorPeter Mikkelsen <petermikkelsen10@gmail.com>2022-01-24 00:00:05 +0000
committerPeter Mikkelsen <petermikkelsen10@gmail.com>2022-01-24 00:00:05 +0000
commit464110afe0599efa5b876eb398769cdbf2a0c1df (patch)
treec572fdeb3773c20d12a9f104292b15f20d885143 /lexer.c
parent9a938d3ce26b2d3728d791c0f858acdbd50223b5 (diff)
Rework the lexer to lex from either a string or bio. This allows multiline dfn's.
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c259
1 files changed, 172 insertions, 87 deletions
diff --git a/lexer.c b/lexer.c
index f56a623..0515727 100644
--- a/lexer.c
+++ b/lexer.c
@@ -4,26 +4,76 @@
#include "apl9.h"
+enum {
+ InputTypeBio,
+ InputTypeString,
+};
+
+typedef struct InputStream InputStream;
+struct InputStream
+{
+ int tag;
+ int offset;
+ Rune last;
+ Biobuf *bio;
+ Rune *string;
+};
+
+int inputEOF(InputStream *);
+Rune getrune(InputStream *);
+void ungetrune(InputStream *);
+
+Statement *lexline(InputStream *, int);
+
+Statement *
+lexlinebio(Biobuf *bio, int toplevel)
+{
+ InputStream in;
+ in.tag = InputTypeBio;
+ in.offset = 0;
+ in.last = 0;
+ in.bio = bio;
+ in.string = nil;
+ return lexline(&in, toplevel);
+}
+
+Statement *
+lexlinestr(Rune *str, int toplevel)
+{
+ InputStream in;
+ in.tag = InputTypeString;
+ in.offset = 0;
+ in.last = 0;
+ in.bio = nil;
+ in.string = str;
+ return lexline(&in, toplevel);
+}
+
Statement *
-lexline(Rune *line, int toplevel)
+lexline(InputStream *input, int toplevel)
{
- int offset = 0;
- int len = runestrlen(line);
Statement *stmt = emalloc(sizeof(Statement));
stmt->ntoks = 0;
stmt->toks = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1);
stmt->guard = nil;
stmt->next = nil;
- while(offset < len){
+ Rune peek = getrune(input);
+ while(!inputEOF(input) && (peek != '\n' || toplevel == 0)){
Rune *p;
- if(isspacerune(line[offset])){
- offset++;
+ if(isspacerune(peek) && peek != '\n'){
+ peek = getrune(input);
continue;
- }else if(runestrchr(L"←⋄⍝⍬", line[offset])){
- switch(line[offset]){
+ }else if(runestrchr(L"←⋄\n⍝⍬", peek)){
+ switch(peek){
case L'←': stmt->toks[stmt->ntoks].tag = ArrowTag; break;
- case L'⋄': stmt->next = lexline(&line[offset+1], toplevel); goto end;
+ case L'\n':
+ case L'⋄':
+ if(stmt->ntoks == 0)
+ stmt = lexline(input, toplevel);
+ else
+ stmt->next = lexline(input, toplevel);
+ goto end;
case L'⍝': goto end;
case L'⍬':
stmt->toks[stmt->ntoks].tag = ArrayTag;
@@ -31,127 +81,117 @@ lexline(Rune *line, int toplevel)
stmt->toks[stmt->ntoks].array->shape[0] = 0;
break;
}
- offset++;
- }else if(!toplevel && line[offset] == ':'){
+ }else if(!toplevel && peek == ':'){
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while(line[offset] != L'⋄' && offset < len){
- *p = line[offset];
- p++;
- offset++;
- }
+ while((peek = getrune(input)) != L'⋄' && peek != '\n' && !inputEOF(input))
+ *p++ = peek;
*p = 0;
- stmt->guard = lexline(buf, toplevel);
+ ungetrune(input);
+ stmt->guard = lexlinestr(buf, toplevel);
stmt->ntoks--;
- }else if(line[offset] == '{'){
+ }else if(peek == '{'){
int unclosed = 1;
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while((line[offset] != '}' || unclosed > 1) && offset < len){
- if(line[offset] == '{')
+ while(((peek = getrune(input)) != '}' || unclosed > 1) && !inputEOF(input)){
+ if(peek == '{')
unclosed++;
- else if(line[offset] == '}')
+ else if(peek == '}')
unclosed--;
- *p = line[offset];
- p++;
- offset++;
+ *p++ = peek;
}
- if(line[offset] != '}')
+ if(peek != '}')
goto syntax_error;
*p = 0;
- offset++;
stmt->toks[stmt->ntoks].tag = FunctionTag;
stmt->toks[stmt->ntoks].func.type = FunctypeDfn;
stmt->toks[stmt->ntoks].func.dfn = runestrdup(buf);
- }else if(line[offset] == '('){
+ }else if(peek == '('){
int unclosed = 1;
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while((line[offset] != ')' || unclosed > 1) && offset < len){
- if(line[offset] == '(')
+ while(((peek = getrune(input)) != ')' || unclosed > 1) && !inputEOF(input) && peek != '\n'){
+ if(peek == '(')
unclosed++;
- else if(line[offset] == ')')
+ else if(peek == ')')
unclosed--;
- *p = line[offset];
- p++;
- offset++;
+ *p++ = peek;
}
- if(line[offset] != ')')
+ if(peek != ')')
goto syntax_error;
*p = 0;
- offset++;
stmt->toks[stmt->ntoks].tag = LParTag;
- stmt->toks[stmt->ntoks].stmt = *lexline(buf, toplevel);
+ stmt->toks[stmt->ntoks].stmt = *lexlinestr(buf, toplevel);
stmt->ntoks++;
stmt->toks[stmt->ntoks].tag = RParTag;
- }else if(p = runestrchr(primfuncnames, line[offset])){
+ }else if(p = runestrchr(primfuncnames, peek)){
stmt->toks[stmt->ntoks].tag = FunctionTag;
stmt->toks[stmt->ntoks].func.type = FunctypePrim;
stmt->toks[stmt->ntoks].func.code = p-primfuncnames;
- offset++;
- }else if(p = runestrchr(primmonopnames, line[offset])){
+ }else if(p = runestrchr(primmonopnames, peek)){
stmt->toks[stmt->ntoks].tag = MonadicOpTag;
stmt->toks[stmt->ntoks].operator.type = OperatortypePrim;
stmt->toks[stmt->ntoks].operator.dyadic = 0;
stmt->toks[stmt->ntoks].operator.code = p-primmonopnames;
- offset++;
- }else if(p = runestrchr(primdyadopnames, line[offset])){
+ }else if(p = runestrchr(primdyadopnames, peek)){
stmt->toks[stmt->ntoks].tag = DyadicOpTag;
stmt->toks[stmt->ntoks].operator.type = OperatortypePrim;
stmt->toks[stmt->ntoks].operator.dyadic = 1;
stmt->toks[stmt->ntoks].operator.code = p-primdyadopnames;
- offset++;
- }else if(p = runestrchr(primhybridnames, line[offset])){
+ }else if(p = runestrchr(primhybridnames, peek)){
stmt->toks[stmt->ntoks].tag = HybridTag;
stmt->toks[stmt->ntoks].hybrid = p-primhybridnames;
- offset++;
- }else if(isdigitrune(line[offset]) || (line[offset] == L'¯' && isdigitrune(line[offset+1]))){
+ }else if(isdigitrune(peek) || peek == L'¯'){
char buf[64];
char *p = buf;
int floating = 0;
- if(line[offset] == L'¯'){
+ if(peek == L'¯'){
*p++ = '-';
- offset++;
+ peek = getrune(input);
}
get_digits:
- while(isdigitrune(line[offset]))
- p += runetochar(p, &line[offset++]);
- if(!floating && line[offset] == '.'){
- p += runetochar(p, &line[offset++]);
+ while(isdigitrune(peek)){
+ p += runetochar(p, &peek);
+ peek = getrune(input);
+ }
+ if(!floating && peek == '.'){
+ p += runetochar(p, &peek);
+ peek = getrune(input);
floating = 1;
goto get_digits;
}
*p = 0;
+ ungetrune(input);
stmt->toks[stmt->ntoks].tag = ArrayTag;
stmt->toks[stmt->ntoks].array = floating ? mkscalarfloat(atof(buf)) : mkscalarint(atoll(buf));
- }else if(runestrchr(L"⍺⍵", line[offset])){
- Rune *name = L"?";
- name[0] = line[offset];
+ }else if(runestrchr(L"⍺⍵", peek)){
+ Rune name[2] = {peek, 0};
stmt->toks[stmt->ntoks].tag = NameTag;
stmt->toks[stmt->ntoks].symbol = getsym(name);
- offset++;
- }else if(isalpharune(line[offset])){
+ }else if(isalpharune(peek)){
Rune buf[64];
Rune *p = buf;
- while(isalpharune(line[offset]) || isdigitrune(line[offset])){
- *p = line[offset];
- p++;
- offset++;
+ while(isalpharune(peek) || isdigitrune(peek)){
+ *p++ = peek;
+ peek = getrune(input);
}
*p = 0;
+ ungetrune(input);
stmt->toks[stmt->ntoks].tag = NameTag;
stmt->toks[stmt->ntoks].symbol = getsym(buf);
- }else if(runestrchr(L"⎕⍞", line[offset])){
+ }else if(runestrchr(L"⎕⍞", peek)){
/* quad names */
Rune buf[64];
Rune *p = buf;
- *p++ = line[offset++];
- while(isalpharune(line[offset]))
- *p++ = toupperrune(line[offset++]);
+ *p++ = peek;
+ peek = getrune(input);
+ while(isalpharune(peek)){
+ *p++ = toupperrune(peek);
+ peek = getrune(input);
+ }
*p = 0;
+ ungetrune(input);
int valid = 0;
for(int i = 0; quadnames[i].name != nil && !valid; i++){
if(runestrcmp(buf, quadnames[i].name) != 0)
@@ -159,30 +199,31 @@ get_digits:
valid = 1;
stmt->toks[stmt->ntoks] = quadnamedatum(quadnames[i]);
}
- if(!valid){
- offset -= runestrlen(buf);
+ if(!valid)
goto syntax_error;
- }
- }else if(line[offset] == '\''){
+ }else if(peek == '\''){
Rune buf[1024]; /* stupid limit on literal string lengths */
Rune *b = buf;
int done = 0;
- offset++;
- while(!done && offset < len){
- if(line[offset] == '\'' && line[offset+1] != '\''){
- *b = 0;
- done = 1;
- }else if(line[offset] == '\'' && line[offset+1] == '\''){
- *b++ = '\'';
- offset++;
- }else
- *b++ = line[offset];
- offset++;
+ peek = getrune(input);
+ while(!done && !inputEOF(input)){
+ if(peek == '\''){
+ peek = getrune(input);
+ if(peek != '\''){
+ *b = 0;
+ done = 1;
+ ungetrune(input);
+ }else{
+ *b++ = '\'';
+ peek = getrune(input);
+ }
+ }else{
+ *b++ = peek;
+ peek = getrune(input);
+ }
}
- if(!done){
- offset = offset - (b-buf);
+ if(!done)
goto syntax_error;
- }
stmt->toks[stmt->ntoks].tag = ArrayTag;
if(runestrlen(buf) == 1)
stmt->toks[stmt->ntoks].array = mkscalarrune(buf[0]);
@@ -191,14 +232,58 @@ get_digits:
}else{
Rune *err;
syntax_error:
- err = runesmprint("Can't lex: %S", &line[offset]);
+ err = runesmprint("Can't lex");
free(stmt->toks);
free(stmt);
throwerror(err, ESyntax);
}
+ /*print("Got token: %S\n", ppdatum(stmt->toks[stmt->ntoks]));*/
stmt->ntoks++;
+ peek = getrune(input);
}
end:
stmt->toks = realloc(stmt->toks, sizeof(Datum) * stmt->ntoks);
return stmt;
-} \ No newline at end of file
+}
+
+int
+inputEOF(InputStream *i)
+{
+ int eof;
+ if(i->tag == InputTypeBio)
+ eof = i->last == Beof;
+ else
+ eof = i->last == 0;
+ /*if(eof) print("EOF\n");*/
+ return eof;
+}
+
+Rune
+getrune(InputStream *i)
+{
+ Rune r;
+ if(i->tag == InputTypeBio)
+ r = Bgetrune(i->bio);
+ else{
+ if(i->string[i->offset] == 0)
+ r = 0;
+ else
+ r = i->string[i->offset++];
+ }
+ /*print("Get rune: '%C' (%d)\n", r, r);*/
+ i->last = r;
+ return r;
+}
+
+void
+ungetrune(InputStream *i)
+{
+ /*print("Unget rune: '%C' (%d)\n", i->last, i->last);*/
+ if(inputEOF(i))
+ return;
+
+ if(i->tag == InputTypeBio)
+ Bungetrune(i->bio);
+ else
+ i->offset--;
+}