#include #include #include #include "apl9.h" enum { InputTypeBio, InputTypeString, }; typedef struct InputStream InputStream; struct InputStream { int tag; int offset; Rune last; Biobuf *bio; Rune *string; }; int inputEOF(InputStream *); Rune getrune(InputStream *); void ungetrune(InputStream *); Statement *lexline(InputStream *, int); Statement * lexlinebio(Biobuf *bio, int toplevel) { InputStream in; in.tag = InputTypeBio; in.offset = 0; in.last = 0; in.bio = bio; in.string = nil; return lexline(&in, toplevel); } Statement * lexlinestr(Rune *str, int toplevel) { InputStream in; in.tag = InputTypeString; in.offset = 0; in.last = 0; in.bio = nil; in.string = str; return lexline(&in, toplevel); } Statement * lexline(InputStream *input, int toplevel) { Statement *stmt = emalloc(sizeof(Statement)); stmt->ntoks = 0; stmt->toks = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1); stmt->guard = nil; stmt->next = nil; Rune peek = getrune(input); while(!inputEOF(input) && (peek != '\n' || toplevel == 0)){ Rune *p; if(isspacerune(peek) && peek != '\n'){ peek = getrune(input); continue; }else if(runestrchr(L"←⋄\n⍝⍬", peek)){ switch(peek){ case L'←': stmt->toks[stmt->ntoks].tag = ArrowTag; break; case L'\n': case L'⋄': if(stmt->ntoks == 0) stmt = lexline(input, toplevel); else stmt->next = lexline(input, toplevel); goto end; case L'⍝': goto end; case L'⍬': stmt->toks[stmt->ntoks].tag = ArrayTag; stmt->toks[stmt->ntoks].array = allocarray(AtypeInt, 1, 0); stmt->toks[stmt->ntoks].array->shape[0] = 0; break; } }else if(!toplevel && peek == ':'){ Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; while((peek = getrune(input)) != L'⋄' && peek != '\n' && !inputEOF(input)) *p++ = peek; *p = 0; ungetrune(input); stmt->guard = lexlinestr(buf, toplevel); stmt->ntoks--; }else if(peek == '{'){ int unclosed = 1; Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; while(((peek = getrune(input)) != '}' || unclosed > 1) && !inputEOF(input)){ if(peek == '{') unclosed++; else if(peek == '}') unclosed--; *p++ = peek; } if(peek != '}') goto syntax_error; *p = 0; stmt->toks[stmt->ntoks].tag = FunctionTag; stmt->toks[stmt->ntoks].func.type = FunctypeDfn; stmt->toks[stmt->ntoks].func.dfn = runestrdup(buf); }else if(peek == '('){ int unclosed = 1; Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; while(((peek = getrune(input)) != ')' || unclosed > 1) && !inputEOF(input) && peek != '\n'){ if(peek == '(') unclosed++; else if(peek == ')') unclosed--; *p++ = peek; } if(peek != ')') goto syntax_error; *p = 0; stmt->toks[stmt->ntoks].tag = LParTag; stmt->toks[stmt->ntoks].stmt = *lexlinestr(buf, toplevel); stmt->ntoks++; stmt->toks[stmt->ntoks].tag = RParTag; }else if(p = runestrchr(primfuncnames, peek)){ stmt->toks[stmt->ntoks].tag = FunctionTag; stmt->toks[stmt->ntoks].func.type = FunctypePrim; stmt->toks[stmt->ntoks].func.code = p-primfuncnames; }else if(p = runestrchr(primmonopnames, peek)){ stmt->toks[stmt->ntoks].tag = MonadicOpTag; stmt->toks[stmt->ntoks].operator.type = OperatortypePrim; stmt->toks[stmt->ntoks].operator.dyadic = 0; stmt->toks[stmt->ntoks].operator.code = p-primmonopnames; }else if(p = runestrchr(primdyadopnames, peek)){ stmt->toks[stmt->ntoks].tag = DyadicOpTag; stmt->toks[stmt->ntoks].operator.type = OperatortypePrim; stmt->toks[stmt->ntoks].operator.dyadic = 1; stmt->toks[stmt->ntoks].operator.code = p-primdyadopnames; }else if(p = runestrchr(primhybridnames, peek)){ stmt->toks[stmt->ntoks].tag = HybridTag; stmt->toks[stmt->ntoks].hybrid = p-primhybridnames; }else if(isdigitrune(peek) || peek == L'¯'){ char buf[64]; char *p = buf; int floating = 0; if(peek == L'¯'){ *p++ = '-'; peek = getrune(input); } get_digits: while(isdigitrune(peek)){ p += runetochar(p, &peek); peek = getrune(input); } if(!floating && peek == '.'){ p += runetochar(p, &peek); peek = getrune(input); floating = 1; goto get_digits; } *p = 0; ungetrune(input); stmt->toks[stmt->ntoks].tag = ArrayTag; stmt->toks[stmt->ntoks].array = floating ? mkscalarfloat(atof(buf)) : mkscalarint(atoll(buf)); }else if(runestrchr(L"⍺⍵", peek)){ Rune name[2] = {peek, 0}; stmt->toks[stmt->ntoks].tag = NameTag; stmt->toks[stmt->ntoks].symbol = getsym(name); }else if(isalpharune(peek)){ Rune buf[64]; Rune *p = buf; while(isalpharune(peek) || isdigitrune(peek)){ *p++ = peek; peek = getrune(input); } *p = 0; ungetrune(input); stmt->toks[stmt->ntoks].tag = NameTag; stmt->toks[stmt->ntoks].symbol = getsym(buf); }else if(runestrchr(L"⎕⍞", peek)){ /* quad names */ Rune buf[64]; Rune *p = buf; *p++ = peek; peek = getrune(input); while(isalpharune(peek)){ *p++ = toupperrune(peek); peek = getrune(input); } *p = 0; ungetrune(input); int valid = 0; for(int i = 0; quadnames[i].name != nil && !valid; i++){ if(runestrcmp(buf, quadnames[i].name) != 0) continue; valid = 1; stmt->toks[stmt->ntoks] = quadnamedatum(quadnames[i]); } if(!valid) goto syntax_error; }else if(peek == '\''){ Rune buf[1024]; /* stupid limit on literal string lengths */ Rune *b = buf; int done = 0; peek = getrune(input); while(!done && !inputEOF(input)){ if(peek == '\''){ peek = getrune(input); if(peek != '\''){ *b = 0; done = 1; ungetrune(input); }else{ *b++ = '\''; peek = getrune(input); } }else{ *b++ = peek; peek = getrune(input); } } if(!done) goto syntax_error; stmt->toks[stmt->ntoks].tag = ArrayTag; if(runestrlen(buf) == 1) stmt->toks[stmt->ntoks].array = mkscalarrune(buf[0]); else stmt->toks[stmt->ntoks].array = mkrunearray(buf); }else{ Rune *err; syntax_error: err = runesmprint("Can't lex"); free(stmt->toks); free(stmt); throwerror(err, ESyntax); } /*print("Got token: %S\n", ppdatum(stmt->toks[stmt->ntoks]));*/ stmt->ntoks++; peek = getrune(input); } end: stmt->toks = realloc(stmt->toks, sizeof(Datum) * stmt->ntoks); return stmt; } int inputEOF(InputStream *i) { int eof; if(i->tag == InputTypeBio) eof = i->last == Beof; else eof = i->last == 0; /*if(eof) print("EOF\n");*/ return eof; } Rune getrune(InputStream *i) { Rune r; if(i->tag == InputTypeBio) r = Bgetrune(i->bio); else{ if(i->string[i->offset] == 0) r = 0; else r = i->string[i->offset++]; } /*print("Get rune: '%C' (%d)\n", r, r);*/ i->last = r; return r; } void ungetrune(InputStream *i) { /*print("Unget rune: '%C' (%d)\n", i->last, i->last);*/ if(inputEOF(i)) return; if(i->tag == InputTypeBio) Bungetrune(i->bio); else i->offset--; }