summaryrefslogtreecommitdiff
path: root/lib/scan_line.c
diff options
context:
space:
mode:
authorPeter Mikkelsen <petermikkelsen10@gmail.com>2026-04-19 17:09:06 +0200
committerPeter Mikkelsen <petermikkelsen10@gmail.com>2026-04-19 17:09:06 +0200
commitc9f1161ecb323c7872559dd40c56d691dbd5959f (patch)
treebc6b974c56d55e11a78cc10bd7da399e54d307cd /lib/scan_line.c
parent984ca5a2330ce29b62892321f258d5a0afb0091c (diff)
Start working on parsing/scanning.
Too many changes to list them all individually.
Diffstat (limited to 'lib/scan_line.c')
-rw-r--r--lib/scan_line.c161
1 files changed, 161 insertions, 0 deletions
diff --git a/lib/scan_line.c b/lib/scan_line.c
new file mode 100644
index 0000000..811c7cb
--- /dev/null
+++ b/lib/scan_line.c
@@ -0,0 +1,161 @@
+/* Aplwc - A Programming Language With Constraints
+ *
+ * Copyright (C) 2026 Peter Mikkelsen <petermikkelsen10@gmail.com>
+ *
+ * This file is part of aplwc.
+ *
+ * Aplwc is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Aplwc is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with aplwc. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <aplwc.h>
+#include "aplwc_internal.h"
+
+static void scan(struct aplwc_eval_context *);
+static struct aplwc_token *emit(struct aplwc_eval_context *, enum aplwc_token_tag, size_t);
+static bool at_end(struct aplwc_eval_context *);
+static void skip_whitespace(struct aplwc_eval_context *);
+static char peek(struct aplwc_eval_context *, size_t);
+static void consume(struct aplwc_eval_context *, size_t);
+static void give_up(struct aplwc_eval_context *);
+static bool scan_syscmd(struct aplwc_eval_context *);
+
+void
+aplwc_scan_line(struct aplwc_eval_context *context, const char *line)
+{
+ size_t linelen = strlen(line);
+
+ if(context->text){
+ size_t textlen = strlen(context->text);
+ context->text = context->aplwc->realloc(context->text, textlen + linelen + 2);
+ context->text[textlen] = '\n';
+ memcpy(context->text+textlen+1, line, linelen + 1);
+ }else{
+ context->text = context->aplwc->alloc(linelen+1);
+ memcpy(context->text, line, linelen + 1);
+ }
+
+ if(context->tokens){
+ for(size_t i = 0; i < context->n_tokens; i++)
+ context->aplwc->free(context->tokens[i]);
+ context->aplwc->free(context->tokens);
+ }
+ scan(context);
+}
+
+static void
+scan(struct aplwc_eval_context *context)
+{
+ context->n_tokens = 0;
+ context->tokens = NULL;
+ context->offset = 0;
+ context->length = strlen(context->text);
+
+ while(!at_end(context)){
+ bool scanned = false;
+
+ skip_whitespace(context);
+ if(at_end(context))
+ continue;
+
+ if(context->n_tokens == 0)
+ scanned |= scan_syscmd(context);
+
+ if(!scanned)
+ give_up(context);
+ }
+}
+
+static struct aplwc_token *
+emit(struct aplwc_eval_context *context, enum aplwc_token_tag tag, size_t length)
+{
+ struct aplwc_token *tok = context->aplwc->alloc(sizeof(*tok));
+ context->n_tokens++;
+ context->tokens = context->aplwc->realloc(context->tokens, sizeof(*context->tokens) * context->n_tokens);
+ context->tokens[context->n_tokens-1] = tok;
+
+ memset(tok, 0, sizeof(*tok));
+ tok->tag = tag;
+ tok->offset_start = context->offset;
+ tok->offset_end = tok->offset_start + length - 1;
+
+ consume(context, length);
+
+ return tok;
+}
+
+static bool
+at_end(struct aplwc_eval_context *context)
+{
+ return context->offset == context->length;
+}
+
+static void
+skip_whitespace(struct aplwc_eval_context *context)
+{
+ for(;;){
+ char c = peek(context, 0);
+ if((c == ' ') || (c == '\t'))
+ consume(context, 1);
+ else
+ break;
+ }
+}
+
+static char
+peek(struct aplwc_eval_context *context, size_t offset)
+{
+ size_t index = context->offset + offset;
+ if(index < context->length)
+ return context->text[index];
+ else
+ return 0;
+}
+
+static void
+consume(struct aplwc_eval_context *context, size_t n)
+{
+ context->offset += n;
+}
+
+static void
+give_up(struct aplwc_eval_context *context)
+{
+ emit(context, APLWC_TOKEN_ERROR, context->length - context->offset);
+}
+
+static bool
+scan_syscmd(struct aplwc_eval_context *context)
+{
+ if(peek(context, 0) != ')')
+ return false;
+
+ size_t len;
+
+ /* Scan the syscmd itself */
+ for(len = 1; !((peek(context, len) == 0) || (peek(context, len) == ' ')); len++);
+ emit(context, APLWC_TOKEN_SYSCMD, len);
+
+ /* Skip the leading whitespace */
+ skip_whitespace(context);
+
+ /* Take the rest of the line as arguments */
+ for(len = 0; !((peek(context, len) == 0) || (peek(context, len) == '\n')); len++)
+ printf("arglen = %zu\n", len);
+ if(len > 0)
+ emit(context, APLWC_TOKEN_SYSCMD_ARGS, len);
+
+ return true;
+}