#include #include #include #include "apl9.h" enum { InputTypeBio, InputTypeString, }; typedef struct InputStream InputStream; struct InputStream { int tag; int offset; Rune last; Biobuf *bio; Rune *string; }; int inputEOF(InputStream *); Rune getrune(InputStream *); void ungetrune(InputStream *); Statement *lexline(InputStream *, int); Statement * lexlinebio(Biobuf *bio, int toplevel) { InputStream in; in.tag = InputTypeBio; in.offset = 0; in.last = 0; in.bio = bio; in.string = nil; return lexline(&in, toplevel); } Statement * lexlinestr(Rune *str, int toplevel) { InputStream in; in.tag = InputTypeString; in.offset = 0; in.last = 0; in.bio = nil; in.string = str; return lexline(&in, toplevel); } Statement * lexline(InputStream *input, int toplevel) { Statement *stmt = emalloc(sizeof(Statement)); stmt->ntoks = 0; stmt->toks = emallocz(sizeof(Datum*) * MAX_LINE_TOKENS, 1); stmt->guard = nil; stmt->next = nil; Rune peek = getrune(input); while(!inputEOF(input) && (peek != '\n' || toplevel == 0)){ Rune *p; if(isspacerune(peek) && peek != '\n'){ peek = getrune(input); continue; }else if(runestrchr(L"←⋄\n⍝⍬", peek)){ switch(peek){ case L'←': stmt->toks[stmt->ntoks] = allocdatum(ArrowTag, 0); break; case L'\n': case L'⋄': if(stmt->ntoks == 0) stmt = lexline(input, toplevel); else{ stmt->next = lexline(input, toplevel); if(stmt->next && stmt->next->ntoks == 0){ // Prevent empty statements freestatement(*(stmt->next)); free(stmt->next); stmt->next = nil; } } goto end; case L'⍝': while(peek != '\n' && !inputEOF(input)) peek = getrune(input); continue; case L'⍬': stmt->toks[stmt->ntoks] = allocdatum(ArrayTag, 0); stmt->toks[stmt->ntoks]->array = allocarray(AtypeInt, 1, 0); stmt->toks[stmt->ntoks]->array->shape[0] = 0; break; } }else if(!toplevel && peek == ':'){ Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; int errorguard = 0; if(getrune(input) == ':') errorguard = 1; else ungetrune(input); while((peek = getrune(input)) != L'⋄' && peek != '\n' && !inputEOF(input)) *p++ = peek; *p = 0; ungetrune(input); stmt->guard = lexlinestr(buf, toplevel); stmt->errorguard = errorguard; stmt->ntoks--; }else if(peek == '{'){ int unclosed = 1; int oplevel = 0; /* 1 = monadic operator, 2 = dyadic operator */ Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; while(((peek = getrune(input)) != '}' || unclosed > 1) && !inputEOF(input)){ if(unclosed == 1 && peek == L'⍶' && oplevel == 0) oplevel = 1; else if(unclosed == 1 && peek == L'⍹') oplevel = 2; if(peek == '{') unclosed++; else if(peek == '}') unclosed--; *p++ = peek; } if(peek != '}') goto syntax_error; *p = 0; if(oplevel == 0){ stmt->toks[stmt->ntoks] = allocdatum(FunctionTag, 0); stmt->toks[stmt->ntoks]->func.type = FunctypeDfn; stmt->toks[stmt->ntoks]->func.dfn = runestrdup(buf); stmt->toks[stmt->ntoks]->func.scope = getcurrentdfn(); }else{ stmt->toks[stmt->ntoks] = allocdatum(oplevel == 1 ? MonadicOpTag : DyadicOpTag, 0); stmt->toks[stmt->ntoks]->operator.type = OperatortypeDop; stmt->toks[stmt->ntoks]->operator.dyadic = oplevel == 2; stmt->toks[stmt->ntoks]->operator.dop = runestrdup(buf); stmt->toks[stmt->ntoks]->operator.scope = getcurrentdfn(); } }else if(peek == '('){ int unclosed = 1; Rune buf[MAX_LINE_LENGTH]; Rune *p = buf; while(((peek = getrune(input)) != ')' || unclosed > 1) && !inputEOF(input) && peek != '\n'){ if(peek == '(') unclosed++; else if(peek == ')') unclosed--; *p++ = peek; } if(peek != ')') goto syntax_error; *p = 0; stmt->toks[stmt->ntoks] = allocdatum(LParTag, 0); stmt->toks[stmt->ntoks]->stmt = *lexlinestr(buf, toplevel); stmt->ntoks++; stmt->toks[stmt->ntoks] = allocdatum(RParTag, 0); }else if(p = runestrchr(primfuncnames, peek)){ stmt->toks[stmt->ntoks] = allocdatum(FunctionTag, 0); stmt->toks[stmt->ntoks]->func.type = FunctypePrim; stmt->toks[stmt->ntoks]->func.code = p-primfuncnames; }else if(p = runestrchr(primmonopnames, peek)){ stmt->toks[stmt->ntoks] = allocdatum(MonadicOpTag, 0); stmt->toks[stmt->ntoks]->operator.type = OperatortypePrim; stmt->toks[stmt->ntoks]->operator.dyadic = 0; stmt->toks[stmt->ntoks]->operator.code = p-primmonopnames; }else if(p = runestrchr(primdyadopnames, peek)){ stmt->toks[stmt->ntoks] = allocdatum(DyadicOpTag, 0); stmt->toks[stmt->ntoks]->operator.type = OperatortypePrim; stmt->toks[stmt->ntoks]->operator.dyadic = 1; stmt->toks[stmt->ntoks]->operator.code = p-primdyadopnames; }else if(p = runestrchr(primhybridnames, peek)){ stmt->toks[stmt->ntoks] = allocdatum(HybridTag, 0); stmt->toks[stmt->ntoks]->hybrid = p-primhybridnames; }else if(isdigitrune(peek) || peek == L'¯'){ char buf[64]; char *p = buf; int floating = 0; if(peek == L'¯'){ *p++ = '-'; peek = getrune(input); } get_digits: while(isdigitrune(peek)){ p += runetochar(p, &peek); peek = getrune(input); } if(!floating && peek == '.'){ p += runetochar(p, &peek); peek = getrune(input); floating = 1; goto get_digits; } *p = 0; ungetrune(input); stmt->toks[stmt->ntoks] = allocdatum(ArrayTag, 0); stmt->toks[stmt->ntoks]->array = floating ? mkscalarfloat(atof(buf)) : mkscalarint(strtoull(buf, nil, 0)); }else if(runestrchr(L"⍺⍵⍶⍹", peek)){ Rune name[2] = {peek, 0}; stmt->toks[stmt->ntoks] = allocdatum(NameTag, 0); stmt->toks[stmt->ntoks]->name = runestrdup(name); }else if(isalpharune(peek) || peek == '_'){ Rune buf[64]; Rune *p = buf; while(isalpharune(peek) || isdigitrune(peek) || peek == '_'){ *p++ = peek; peek = getrune(input); } *p = 0; ungetrune(input); stmt->toks[stmt->ntoks] = allocdatum(NameTag, 0); stmt->toks[stmt->ntoks]->name = runestrdup(buf); }else if(runestrchr(L"⎕⍞", peek)){ /* quad names */ Rune buf[64]; Rune *p = buf; *p++ = peek; peek = getrune(input); while(isalpharune(peek) || isdigitrune(peek)){ *p++ = toupperrune(peek); peek = getrune(input); } *p = 0; ungetrune(input); int valid = 0; for(int i = 0; quadnames[i].name != nil && !valid; i++){ if(runestrcmp(buf, quadnames[i].name) != 0) continue; valid = 1; stmt->toks[stmt->ntoks] = quadnamedatum(quadnames[i]); } if(!valid) goto syntax_error; }else if(peek == '\''){ Rune buf[1024]; /* stupid limit on literal string lengths */ Rune *b = buf; int done = 0; peek = getrune(input); while(!done && !inputEOF(input)){ if(peek == '\''){ peek = getrune(input); if(peek != '\''){ *b = 0; done = 1; ungetrune(input); }else{ *b++ = '\''; peek = getrune(input); } }else{ *b++ = peek; peek = getrune(input); } } if(!done) goto syntax_error; stmt->toks[stmt->ntoks] = allocdatum(ArrayTag, 0); if(runestrlen(buf) == 1) stmt->toks[stmt->ntoks]->array = mkscalarrune(buf[0]); else stmt->toks[stmt->ntoks]->array = mkrunearray(buf); }else{ Rune *err; syntax_error: /* eat the rest of the line */ while(getrune(input) != '\n'); err = runesmprint("Can't lex"); free(stmt->toks); free(stmt); throwerror(err, ESyntax); } // print("Got token: %S\n", ppdatum(stmt->toks[stmt->ntoks])); stmt->ntoks++; peek = getrune(input); } end: stmt->toks = erealloc(stmt->toks, sizeof(Datum) * stmt->ntoks); return stmt; } int inputEOF(InputStream *i) { int eof; if(i->tag == InputTypeBio) eof = i->last == Beof; else eof = i->last == 0; /*if(eof) print("EOF\n");*/ return eof; } Rune getrune(InputStream *i) { Rune r; if(i->tag == InputTypeBio) r = Bgetrune(i->bio); else{ if(i->string[i->offset] == 0) r = 0; else r = i->string[i->offset++]; } /* print("Get rune: '%C' (%d)\n", r, r); */ i->last = r; return r; } void ungetrune(InputStream *i) { /* print("Unget rune: '%C' (%d)\n", i->last, i->last); */ if(inputEOF(i)) return; if(i->tag == InputTypeBio) Bungetrune(i->bio); else i->offset--; }