summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Mikkelsen <petermikkelsen10@gmail.com>2022-01-24 00:00:05 +0000
committerPeter Mikkelsen <petermikkelsen10@gmail.com>2022-01-24 00:00:05 +0000
commit464110afe0599efa5b876eb398769cdbf2a0c1df (patch)
treec572fdeb3773c20d12a9f104292b15f20d885143
parent9a938d3ce26b2d3728d791c0f858acdbd50223b5 (diff)
Rework the lexer to lex from either a string or bio. This allows multiline dfn's.
-rw-r--r--apl9.h5
-rw-r--r--functions.c2
-rw-r--r--lexer.c259
-rw-r--r--main.c15
-rw-r--r--quadnames.c17
5 files changed, 197 insertions, 101 deletions
diff --git a/apl9.h b/apl9.h
index 7f194d0..b6c1138 100644
--- a/apl9.h
+++ b/apl9.h
@@ -188,7 +188,7 @@ struct DfnFrame
/* Function prototypes for the different source files */
/* main.c */
-Datum *evalline(Rune *, int);
+Datum *evalline(Rune *, Biobuf *, int);
Rune *prompt(Rune *);
/* print.c */
@@ -199,7 +199,8 @@ Rune *ppoperator(Operator);
Rune *ppfunction(Function);
/* lexer.c */
-Statement *lexline(Rune *, int);
+Statement *lexlinebio(Biobuf *, int);
+Statement *lexlinestr(Rune *, int);
/* array.c */
Array *mkscalarint(vlong);
diff --git a/functions.c b/functions.c
index c8bff78..207daeb 100644
--- a/functions.c
+++ b/functions.c
@@ -144,7 +144,7 @@ runfunc(Function f, Array *left, Array *right)
omega->undefined = 0;
incref(right);
- Datum *dfnres = evalline(f.dfn, 0);
+ Datum *dfnres = evalline(f.dfn, nil, 0);
popdfnframe();
result = (*dfnres).array; /* TODO what if the evaluation failed */
}else if(f.type == FunctypePrim){
diff --git a/lexer.c b/lexer.c
index f56a623..0515727 100644
--- a/lexer.c
+++ b/lexer.c
@@ -4,26 +4,76 @@
#include "apl9.h"
+enum {
+ InputTypeBio,
+ InputTypeString,
+};
+
+typedef struct InputStream InputStream;
+struct InputStream
+{
+ int tag;
+ int offset;
+ Rune last;
+ Biobuf *bio;
+ Rune *string;
+};
+
+int inputEOF(InputStream *);
+Rune getrune(InputStream *);
+void ungetrune(InputStream *);
+
+Statement *lexline(InputStream *, int);
+
+Statement *
+lexlinebio(Biobuf *bio, int toplevel)
+{
+ InputStream in;
+ in.tag = InputTypeBio;
+ in.offset = 0;
+ in.last = 0;
+ in.bio = bio;
+ in.string = nil;
+ return lexline(&in, toplevel);
+}
+
+Statement *
+lexlinestr(Rune *str, int toplevel)
+{
+ InputStream in;
+ in.tag = InputTypeString;
+ in.offset = 0;
+ in.last = 0;
+ in.bio = nil;
+ in.string = str;
+ return lexline(&in, toplevel);
+}
+
Statement *
-lexline(Rune *line, int toplevel)
+lexline(InputStream *input, int toplevel)
{
- int offset = 0;
- int len = runestrlen(line);
Statement *stmt = emalloc(sizeof(Statement));
stmt->ntoks = 0;
stmt->toks = mallocz(sizeof(Datum) * MAX_LINE_TOKENS, 1);
stmt->guard = nil;
stmt->next = nil;
- while(offset < len){
+ Rune peek = getrune(input);
+ while(!inputEOF(input) && (peek != '\n' || toplevel == 0)){
Rune *p;
- if(isspacerune(line[offset])){
- offset++;
+ if(isspacerune(peek) && peek != '\n'){
+ peek = getrune(input);
continue;
- }else if(runestrchr(L"←⋄⍝⍬", line[offset])){
- switch(line[offset]){
+ }else if(runestrchr(L"←⋄\n⍝⍬", peek)){
+ switch(peek){
case L'←': stmt->toks[stmt->ntoks].tag = ArrowTag; break;
- case L'⋄': stmt->next = lexline(&line[offset+1], toplevel); goto end;
+ case L'\n':
+ case L'⋄':
+ if(stmt->ntoks == 0)
+ stmt = lexline(input, toplevel);
+ else
+ stmt->next = lexline(input, toplevel);
+ goto end;
case L'⍝': goto end;
case L'⍬':
stmt->toks[stmt->ntoks].tag = ArrayTag;
@@ -31,127 +81,117 @@ lexline(Rune *line, int toplevel)
stmt->toks[stmt->ntoks].array->shape[0] = 0;
break;
}
- offset++;
- }else if(!toplevel && line[offset] == ':'){
+ }else if(!toplevel && peek == ':'){
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while(line[offset] != L'⋄' && offset < len){
- *p = line[offset];
- p++;
- offset++;
- }
+ while((peek = getrune(input)) != L'⋄' && peek != '\n' && !inputEOF(input))
+ *p++ = peek;
*p = 0;
- stmt->guard = lexline(buf, toplevel);
+ ungetrune(input);
+ stmt->guard = lexlinestr(buf, toplevel);
stmt->ntoks--;
- }else if(line[offset] == '{'){
+ }else if(peek == '{'){
int unclosed = 1;
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while((line[offset] != '}' || unclosed > 1) && offset < len){
- if(line[offset] == '{')
+ while(((peek = getrune(input)) != '}' || unclosed > 1) && !inputEOF(input)){
+ if(peek == '{')
unclosed++;
- else if(line[offset] == '}')
+ else if(peek == '}')
unclosed--;
- *p = line[offset];
- p++;
- offset++;
+ *p++ = peek;
}
- if(line[offset] != '}')
+ if(peek != '}')
goto syntax_error;
*p = 0;
- offset++;
stmt->toks[stmt->ntoks].tag = FunctionTag;
stmt->toks[stmt->ntoks].func.type = FunctypeDfn;
stmt->toks[stmt->ntoks].func.dfn = runestrdup(buf);
- }else if(line[offset] == '('){
+ }else if(peek == '('){
int unclosed = 1;
Rune buf[MAX_LINE_LENGTH];
Rune *p = buf;
- offset++;
- while((line[offset] != ')' || unclosed > 1) && offset < len){
- if(line[offset] == '(')
+ while(((peek = getrune(input)) != ')' || unclosed > 1) && !inputEOF(input) && peek != '\n'){
+ if(peek == '(')
unclosed++;
- else if(line[offset] == ')')
+ else if(peek == ')')
unclosed--;
- *p = line[offset];
- p++;
- offset++;
+ *p++ = peek;
}
- if(line[offset] != ')')
+ if(peek != ')')
goto syntax_error;
*p = 0;
- offset++;
stmt->toks[stmt->ntoks].tag = LParTag;
- stmt->toks[stmt->ntoks].stmt = *lexline(buf, toplevel);
+ stmt->toks[stmt->ntoks].stmt = *lexlinestr(buf, toplevel);
stmt->ntoks++;
stmt->toks[stmt->ntoks].tag = RParTag;
- }else if(p = runestrchr(primfuncnames, line[offset])){
+ }else if(p = runestrchr(primfuncnames, peek)){
stmt->toks[stmt->ntoks].tag = FunctionTag;
stmt->toks[stmt->ntoks].func.type = FunctypePrim;
stmt->toks[stmt->ntoks].func.code = p-primfuncnames;
- offset++;
- }else if(p = runestrchr(primmonopnames, line[offset])){
+ }else if(p = runestrchr(primmonopnames, peek)){
stmt->toks[stmt->ntoks].tag = MonadicOpTag;
stmt->toks[stmt->ntoks].operator.type = OperatortypePrim;
stmt->toks[stmt->ntoks].operator.dyadic = 0;
stmt->toks[stmt->ntoks].operator.code = p-primmonopnames;
- offset++;
- }else if(p = runestrchr(primdyadopnames, line[offset])){
+ }else if(p = runestrchr(primdyadopnames, peek)){
stmt->toks[stmt->ntoks].tag = DyadicOpTag;
stmt->toks[stmt->ntoks].operator.type = OperatortypePrim;
stmt->toks[stmt->ntoks].operator.dyadic = 1;
stmt->toks[stmt->ntoks].operator.code = p-primdyadopnames;
- offset++;
- }else if(p = runestrchr(primhybridnames, line[offset])){
+ }else if(p = runestrchr(primhybridnames, peek)){
stmt->toks[stmt->ntoks].tag = HybridTag;
stmt->toks[stmt->ntoks].hybrid = p-primhybridnames;
- offset++;
- }else if(isdigitrune(line[offset]) || (line[offset] == L'¯' && isdigitrune(line[offset+1]))){
+ }else if(isdigitrune(peek) || peek == L'¯'){
char buf[64];
char *p = buf;
int floating = 0;
- if(line[offset] == L'¯'){
+ if(peek == L'¯'){
*p++ = '-';
- offset++;
+ peek = getrune(input);
}
get_digits:
- while(isdigitrune(line[offset]))
- p += runetochar(p, &line[offset++]);
- if(!floating && line[offset] == '.'){
- p += runetochar(p, &line[offset++]);
+ while(isdigitrune(peek)){
+ p += runetochar(p, &peek);
+ peek = getrune(input);
+ }
+ if(!floating && peek == '.'){
+ p += runetochar(p, &peek);
+ peek = getrune(input);
floating = 1;
goto get_digits;
}
*p = 0;
+ ungetrune(input);
stmt->toks[stmt->ntoks].tag = ArrayTag;
stmt->toks[stmt->ntoks].array = floating ? mkscalarfloat(atof(buf)) : mkscalarint(atoll(buf));
- }else if(runestrchr(L"⍺⍵", line[offset])){
- Rune *name = L"?";
- name[0] = line[offset];
+ }else if(runestrchr(L"⍺⍵", peek)){
+ Rune name[2] = {peek, 0};
stmt->toks[stmt->ntoks].tag = NameTag;
stmt->toks[stmt->ntoks].symbol = getsym(name);
- offset++;
- }else if(isalpharune(line[offset])){
+ }else if(isalpharune(peek)){
Rune buf[64];
Rune *p = buf;
- while(isalpharune(line[offset]) || isdigitrune(line[offset])){
- *p = line[offset];
- p++;
- offset++;
+ while(isalpharune(peek) || isdigitrune(peek)){
+ *p++ = peek;
+ peek = getrune(input);
}
*p = 0;
+ ungetrune(input);
stmt->toks[stmt->ntoks].tag = NameTag;
stmt->toks[stmt->ntoks].symbol = getsym(buf);
- }else if(runestrchr(L"⎕⍞", line[offset])){
+ }else if(runestrchr(L"⎕⍞", peek)){
/* quad names */
Rune buf[64];
Rune *p = buf;
- *p++ = line[offset++];
- while(isalpharune(line[offset]))
- *p++ = toupperrune(line[offset++]);
+ *p++ = peek;
+ peek = getrune(input);
+ while(isalpharune(peek)){
+ *p++ = toupperrune(peek);
+ peek = getrune(input);
+ }
*p = 0;
+ ungetrune(input);
int valid = 0;
for(int i = 0; quadnames[i].name != nil && !valid; i++){
if(runestrcmp(buf, quadnames[i].name) != 0)
@@ -159,30 +199,31 @@ get_digits:
valid = 1;
stmt->toks[stmt->ntoks] = quadnamedatum(quadnames[i]);
}
- if(!valid){
- offset -= runestrlen(buf);
+ if(!valid)
goto syntax_error;
- }
- }else if(line[offset] == '\''){
+ }else if(peek == '\''){
Rune buf[1024]; /* stupid limit on literal string lengths */
Rune *b = buf;
int done = 0;
- offset++;
- while(!done && offset < len){
- if(line[offset] == '\'' && line[offset+1] != '\''){
- *b = 0;
- done = 1;
- }else if(line[offset] == '\'' && line[offset+1] == '\''){
- *b++ = '\'';
- offset++;
- }else
- *b++ = line[offset];
- offset++;
+ peek = getrune(input);
+ while(!done && !inputEOF(input)){
+ if(peek == '\''){
+ peek = getrune(input);
+ if(peek != '\''){
+ *b = 0;
+ done = 1;
+ ungetrune(input);
+ }else{
+ *b++ = '\'';
+ peek = getrune(input);
+ }
+ }else{
+ *b++ = peek;
+ peek = getrune(input);
+ }
}
- if(!done){
- offset = offset - (b-buf);
+ if(!done)
goto syntax_error;
- }
stmt->toks[stmt->ntoks].tag = ArrayTag;
if(runestrlen(buf) == 1)
stmt->toks[stmt->ntoks].array = mkscalarrune(buf[0]);
@@ -191,14 +232,58 @@ get_digits:
}else{
Rune *err;
syntax_error:
- err = runesmprint("Can't lex: %S", &line[offset]);
+ err = runesmprint("Can't lex");
free(stmt->toks);
free(stmt);
throwerror(err, ESyntax);
}
+ /*print("Got token: %S\n", ppdatum(stmt->toks[stmt->ntoks]));*/
stmt->ntoks++;
+ peek = getrune(input);
}
end:
stmt->toks = realloc(stmt->toks, sizeof(Datum) * stmt->ntoks);
return stmt;
-} \ No newline at end of file
+}
+
+int
+inputEOF(InputStream *i)
+{
+ int eof;
+ if(i->tag == InputTypeBio)
+ eof = i->last == Beof;
+ else
+ eof = i->last == 0;
+ /*if(eof) print("EOF\n");*/
+ return eof;
+}
+
+Rune
+getrune(InputStream *i)
+{
+ Rune r;
+ if(i->tag == InputTypeBio)
+ r = Bgetrune(i->bio);
+ else{
+ if(i->string[i->offset] == 0)
+ r = 0;
+ else
+ r = i->string[i->offset++];
+ }
+ /*print("Get rune: '%C' (%d)\n", r, r);*/
+ i->last = r;
+ return r;
+}
+
+void
+ungetrune(InputStream *i)
+{
+ /*print("Unget rune: '%C' (%d)\n", i->last, i->last);*/
+ if(inputEOF(i))
+ return;
+
+ if(i->tag == InputTypeBio)
+ Bungetrune(i->bio);
+ else
+ i->offset--;
+}
diff --git a/main.c b/main.c
index 1397003..6218fa0 100644
--- a/main.c
+++ b/main.c
@@ -43,8 +43,8 @@ restart:
while(!off){
checkmem("main loop");
- Rune *input = prompt(L"\t");
- Datum *result = evalline(input, 1);
+ print("\t");
+ Datum *result = evalline(nil, stdin, 1);
if(result == nil)
continue;
else{
@@ -75,9 +75,16 @@ prompt(Rune *pr)
}
Datum *
-evalline(Rune *line, int toplevel)
+evalline(Rune *line, Biobuf *bio, int toplevel)
{
- Statement *stmts = lexline(line, toplevel);
+ Statement *stmts;
+ if(line)
+ stmts = lexlinestr(line, toplevel);
+ else if(bio)
+ stmts = lexlinebio(bio, toplevel);
+ else
+ stmts = lexlinebio(stdin, toplevel);
+
Datum *result = eval(stmts, toplevel);
if(result)
return result;
diff --git a/quadnames.c b/quadnames.c
index fa1b247..be543ff 100644
--- a/quadnames.c
+++ b/quadnames.c
@@ -58,8 +58,8 @@ quadnamedatum(QuadnameDef q)
Datum *
getquad(void)
{
- Rune *input = prompt(L"⎕:\n\t");
- Datum *result = evalline(input, 1);
+ print("⎕:\n\t");
+ Datum *result = evalline(nil, nil, 1);
/* TODO check that the expression doesn't fail */
return result;
}
@@ -141,11 +141,14 @@ runfile(Array *a)
if(bio == nil)
return mkscalarint(0);
- char *charcode = Brdstr(bio, Beof, 1);
- Rune *code = runesmprint("%s", charcode);
- evalline(code, 1);
- free(charcode);
- free(code);
+ while(1){
+ Rune r = Bgetrune(bio);
+ Bungetrune(bio);
+ if(r == Beof)
+ break;
+ else
+ evalline(nil, bio, 1);
+ }
Bterm(bio);
return mkscalarint(1);
}