diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 259 |
1 files changed, 172 insertions, 87 deletions
@@ -4,26 +4,76 @@ #include "apl9.h" +enum { + InputTypeBio, + InputTypeString, +}; + +typedef struct InputStream InputStream; +struct InputStream +{ + int tag; + int offset; + Rune last; + Biobuf *bio; + Rune *string; +}; + +int inputEOF(InputStream *); +Rune getrune(InputStream *); +void ungetrune(InputStream *); + +Statement *lexline(InputStream *, int); + +Statement * +lexlinebio(Biobuf *bio, int toplevel) +{ + InputStream in; + in.tag = InputTypeBio; + in.offset = 0; + in.last = 0; + in.bio = bio; + in.string = nil; + return lexline(&in, toplevel); +} + +Statement * +lexlinestr(Rune *str, int toplevel) +{ + InputStream in; + in.tag = InputTypeString; + in.offset = 0; + in.last = 0; + in.bio = nil; + in.string = str; + return lexline(&in, toplevel); +} + Statement * -lexline(Rune *line, int toplevel) +lexline(InputStream *input, int toplevel) { - int offset = 0; - int len = runestrlen(line); Statement *stmt = emalloc(sizeof(Statement)); stmt->ntoks = 0; stmt->toks = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1); stmt->guard = nil; stmt->next = nil; - while(offset < len){ + Rune peek = getrune(input); + while(!inputEOF(input) && (peek != '\n' || toplevel == 0)){ Rune *p; - if(isspacerune(line[offset])){ - offset++; + if(isspacerune(peek) && peek != '\n'){ + peek = getrune(input); continue; - }else if(runestrchr(L"←⋄⍝⍬", line[offset])){ - switch(line[offset]){ + }else if(runestrchr(L"←⋄\n⍝⍬", peek)){ + switch(peek){ case L'←': stmt->toks[stmt->ntoks].tag = ArrowTag; break; - case L'⋄': stmt->next = lexline(&line[offset+1], toplevel); goto end; + case L'\n': + case L'⋄': + if(stmt->ntoks == 0) + stmt = lexline(input, toplevel); + else + stmt->next = lexline(input, toplevel); + goto end; case L'⍝': goto end; case L'⍬': stmt->toks[stmt->ntoks].tag = ArrayTag; @@ -31,127 +81,117 @@ lexline(Rune *line, int toplevel) stmt->toks[stmt->ntoks].array->shape[0] = 0; break; } - offset++; - }else if(!toplevel && line[offset] == ':'){ + }else if(!toplevel && peek == ':'){ Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; - offset++; - while(line[offset] != L'⋄' && offset < len){ - *p = line[offset]; - p++; - offset++; - } + while((peek = getrune(input)) != L'⋄' && peek != '\n' && !inputEOF(input)) + *p++ = peek; *p = 0; - stmt->guard = lexline(buf, toplevel); + ungetrune(input); + stmt->guard = lexlinestr(buf, toplevel); stmt->ntoks--; - }else if(line[offset] == '{'){ + }else if(peek == '{'){ int unclosed = 1; Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; - offset++; - while((line[offset] != '}' || unclosed > 1) && offset < len){ - if(line[offset] == '{') + while(((peek = getrune(input)) != '}' || unclosed > 1) && !inputEOF(input)){ + if(peek == '{') unclosed++; - else if(line[offset] == '}') + else if(peek == '}') unclosed--; - *p = line[offset]; - p++; - offset++; + *p++ = peek; } - if(line[offset] != '}') + if(peek != '}') goto syntax_error; *p = 0; - offset++; stmt->toks[stmt->ntoks].tag = FunctionTag; stmt->toks[stmt->ntoks].func.type = FunctypeDfn; stmt->toks[stmt->ntoks].func.dfn = runestrdup(buf); - }else if(line[offset] == '('){ + }else if(peek == '('){ int unclosed = 1; Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; - offset++; - while((line[offset] != ')' || unclosed > 1) && offset < len){ - if(line[offset] == '(') + while(((peek = getrune(input)) != ')' || unclosed > 1) && !inputEOF(input) && peek != '\n'){ + if(peek == '(') unclosed++; - else if(line[offset] == ')') + else if(peek == ')') unclosed--; - *p = line[offset]; - p++; - offset++; + *p++ = peek; } - if(line[offset] != ')') + if(peek != ')') goto syntax_error; *p = 0; - offset++; stmt->toks[stmt->ntoks].tag = LParTag; - stmt->toks[stmt->ntoks].stmt = *lexline(buf, toplevel); + stmt->toks[stmt->ntoks].stmt = *lexlinestr(buf, toplevel); stmt->ntoks++; stmt->toks[stmt->ntoks].tag = RParTag; - }else if(p = runestrchr(primfuncnames, line[offset])){ + }else if(p = runestrchr(primfuncnames, peek)){ stmt->toks[stmt->ntoks].tag = FunctionTag; stmt->toks[stmt->ntoks].func.type = FunctypePrim; stmt->toks[stmt->ntoks].func.code = p-primfuncnames; - offset++; - }else if(p = runestrchr(primmonopnames, line[offset])){ + }else if(p = runestrchr(primmonopnames, peek)){ stmt->toks[stmt->ntoks].tag = MonadicOpTag; stmt->toks[stmt->ntoks].operator.type = OperatortypePrim; stmt->toks[stmt->ntoks].operator.dyadic = 0; stmt->toks[stmt->ntoks].operator.code = p-primmonopnames; - offset++; - }else if(p = runestrchr(primdyadopnames, line[offset])){ + }else if(p = runestrchr(primdyadopnames, peek)){ stmt->toks[stmt->ntoks].tag = DyadicOpTag; stmt->toks[stmt->ntoks].operator.type = OperatortypePrim; stmt->toks[stmt->ntoks].operator.dyadic = 1; stmt->toks[stmt->ntoks].operator.code = p-primdyadopnames; - offset++; - }else if(p = runestrchr(primhybridnames, line[offset])){ + }else if(p = runestrchr(primhybridnames, peek)){ stmt->toks[stmt->ntoks].tag = HybridTag; stmt->toks[stmt->ntoks].hybrid = p-primhybridnames; - offset++; - }else if(isdigitrune(line[offset]) || (line[offset] == L'¯' && isdigitrune(line[offset+1]))){ + }else if(isdigitrune(peek) || peek == L'¯'){ char buf[64]; char *p = buf; int floating = 0; - if(line[offset] == L'¯'){ + if(peek == L'¯'){ *p++ = '-'; - offset++; + peek = getrune(input); } get_digits: - while(isdigitrune(line[offset])) - p += runetochar(p, &line[offset++]); - if(!floating && line[offset] == '.'){ - p += runetochar(p, &line[offset++]); + while(isdigitrune(peek)){ + p += runetochar(p, &peek); + peek = getrune(input); + } + if(!floating && peek == '.'){ + p += runetochar(p, &peek); + peek = getrune(input); floating = 1; goto get_digits; } *p = 0; + ungetrune(input); stmt->toks[stmt->ntoks].tag = ArrayTag; stmt->toks[stmt->ntoks].array = floating ? mkscalarfloat(atof(buf)) : mkscalarint(atoll(buf)); - }else if(runestrchr(L"⍺⍵", line[offset])){ - Rune *name = L"?"; - name[0] = line[offset]; + }else if(runestrchr(L"⍺⍵", peek)){ + Rune name[2] = {peek, 0}; stmt->toks[stmt->ntoks].tag = NameTag; stmt->toks[stmt->ntoks].symbol = getsym(name); - offset++; - }else if(isalpharune(line[offset])){ + }else if(isalpharune(peek)){ Rune buf[64]; Rune *p = buf; - while(isalpharune(line[offset]) || isdigitrune(line[offset])){ - *p = line[offset]; - p++; - offset++; + while(isalpharune(peek) || isdigitrune(peek)){ + *p++ = peek; + peek = getrune(input); } *p = 0; + ungetrune(input); stmt->toks[stmt->ntoks].tag = NameTag; stmt->toks[stmt->ntoks].symbol = getsym(buf); - }else if(runestrchr(L"⎕⍞", line[offset])){ + }else if(runestrchr(L"⎕⍞", peek)){ /* quad names */ Rune buf[64]; Rune *p = buf; - *p++ = line[offset++]; - while(isalpharune(line[offset])) - *p++ = toupperrune(line[offset++]); + *p++ = peek; + peek = getrune(input); + while(isalpharune(peek)){ + *p++ = toupperrune(peek); + peek = getrune(input); + } *p = 0; + ungetrune(input); int valid = 0; for(int i = 0; quadnames[i].name != nil && !valid; i++){ if(runestrcmp(buf, quadnames[i].name) != 0) @@ -159,30 +199,31 @@ get_digits: valid = 1; stmt->toks[stmt->ntoks] = quadnamedatum(quadnames[i]); } - if(!valid){ - offset -= runestrlen(buf); + if(!valid) goto syntax_error; - } - }else if(line[offset] == '\''){ + }else if(peek == '\''){ Rune buf[1024]; /* stupid limit on literal string lengths */ Rune *b = buf; int done = 0; - offset++; - while(!done && offset < len){ - if(line[offset] == '\'' && line[offset+1] != '\''){ - *b = 0; - done = 1; - }else if(line[offset] == '\'' && line[offset+1] == '\''){ - *b++ = '\''; - offset++; - }else - *b++ = line[offset]; - offset++; + peek = getrune(input); + while(!done && !inputEOF(input)){ + if(peek == '\''){ + peek = getrune(input); + if(peek != '\''){ + *b = 0; + done = 1; + ungetrune(input); + }else{ + *b++ = '\''; + peek = getrune(input); + } + }else{ + *b++ = peek; + peek = getrune(input); + } } - if(!done){ - offset = offset - (b-buf); + if(!done) goto syntax_error; - } stmt->toks[stmt->ntoks].tag = ArrayTag; if(runestrlen(buf) == 1) stmt->toks[stmt->ntoks].array = mkscalarrune(buf[0]); @@ -191,14 +232,58 @@ get_digits: }else{ Rune *err; syntax_error: - err = runesmprint("Can't lex: %S", &line[offset]); + err = runesmprint("Can't lex"); free(stmt->toks); free(stmt); throwerror(err, ESyntax); } + /*print("Got token: %S\n", ppdatum(stmt->toks[stmt->ntoks]));*/ stmt->ntoks++; + peek = getrune(input); } end: stmt->toks = realloc(stmt->toks, sizeof(Datum) * stmt->ntoks); return stmt; -}
\ No newline at end of file +} + +int +inputEOF(InputStream *i) +{ + int eof; + if(i->tag == InputTypeBio) + eof = i->last == Beof; + else + eof = i->last == 0; + /*if(eof) print("EOF\n");*/ + return eof; +} + +Rune +getrune(InputStream *i) +{ + Rune r; + if(i->tag == InputTypeBio) + r = Bgetrune(i->bio); + else{ + if(i->string[i->offset] == 0) + r = 0; + else + r = i->string[i->offset++]; + } + /*print("Get rune: '%C' (%d)\n", r, r);*/ + i->last = r; + return r; +} + +void +ungetrune(InputStream *i) +{ + /*print("Unget rune: '%C' (%d)\n", i->last, i->last);*/ + if(inputEOF(i)) + return; + + if(i->tag == InputTypeBio) + Bungetrune(i->bio); + else + i->offset--; +} |