summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Kirik (snegovick) <snegovick@uprojects.org>2025-12-12 03:51:07 +0300
committerKonstantin Kirik (snegovick) <snegovick@uprojects.org>2025-12-12 03:51:07 +0300
commita40ef1434889babbd88c9d0c5913c70e96ac2774 (patch)
treea71e4f3cb1d6056554e3958468ddfcee0300e08e
parentcc280d7e5d07fd61e45825b3850c8baaa4147639 (diff)
Add simple parser for future shell-like lang
-rw-r--r--mkfile35
-rw-r--r--sh92.b208
-rw-r--r--sh9parser.b130
-rw-r--r--sh9parser.m28
-rw-r--r--sh9util.b26
-rw-r--r--sh9util.m8
6 files changed, 435 insertions, 0 deletions
diff --git a/mkfile b/mkfile
new file mode 100644
index 0000000..cf68f33
--- /dev/null
+++ b/mkfile
@@ -0,0 +1,35 @@
+<../../../mkconfig
+
+TARG=sh92.dis\
+ sh9util.dis\
+ sh9parser.dis\
+
+INS= $ROOT/dis/sh92.dis\
+ $ROOT/dis/sh9/sh9util.dis\
+ $ROOT/dis/sh9/sh9parser.dis\
+
+SYSMODULES=\
+ sys.m\
+
+DISBIN=$ROOT/dis/sh9
+
+<$ROOT/mkfiles/mkdis
+
+all:V: $TARG
+
+install:V: $INS
+ cp $DISBIN/sh92.dis $DISBIN/..
+
+nuke:V: clean
+ rm -f $INS
+
+clean:V:
+ rm -f *.dis *.sbl
+
+uninstall:V:
+ rm -f $INS
+
+$ROOT/dis/sh92.dis: sh92.dis
+ mkdir $DISBIN/ && rm -f $ROOT/dis/sh92.dis && cp sh92.dis $ROOT/dis/sh92.dis
+
+%.dis: ${SYSMODULES:%=$MODDIR/%}
diff --git a/sh92.b b/sh92.b
new file mode 100644
index 0000000..bce58c3
--- /dev/null
+++ b/sh92.b
@@ -0,0 +1,208 @@
+implement Sh92;
+
+include "sys.m";
+include "draw.m";
+include "sh9util.m";
+include "sh9parser.m";
+
+sys: Sys;
+sh9u: Sh9Util;
+sh9p: Sh9Parser;
+
+Sh92: module {
+ init: fn(nil: ref Draw->Context, nil: list of string);
+};
+
+ModProc: adt {
+ name: string;
+ start: int;
+};
+
+ModVar: adt {
+ name: string;
+ val: string;
+};
+
+ShModule: adt {
+ global_vars: list of ref ModVar;
+ procs: list of ref ModProc;
+};
+
+GrammarNode: import sh9p;
+TokNode: import sh9p;
+mk_tok: import sh9p;
+set_last_tok: import sh9p;
+print_toks: import sh9p;
+parse_toks: import sh9p;
+
+reverse_list: import sh9u;
+to_array: import sh9u;
+
+S_UNKNOWN: con "UNK";
+S_ID: con "ID";
+S_STR: con "STR";
+S_EQ: con "EQ";
+S_DOL: con "DOL";
+S_COLON: con "COLON";
+S_SEMIC: con "SEMIC";
+S_LPAR: con "LPAR";
+S_RPAR: con "RPAR";
+S_LCURLY: con "LCURLY";
+S_RCURLY: con "RCURLY";
+S_DQSTR: con "DQSTR";
+S_SQSTR: con "SQSTR";
+S_DQTE: con "DQTE";
+S_SQTE: con "SQTE";
+S_SP: con "SP";
+S_TAB: con "TAB";
+S_EOL: con "EOL";
+
+S_STMT: con "STMT";
+S_EXPR: con "EXPR";
+S_CALL: con "CALL";
+
+tokenize(line: string, line_n: int): array of ref TokNode {
+ toks : list of ref TokNode;
+ last_tok: TokNode;
+ last_tok.start = -1;
+ last_tok.line = -1;
+ last_tok.tok = "";
+ last_tok.typ = S_UNKNOWN;
+
+ for (i := 0; i < len line; i++) {
+ if (last_tok.typ == S_DQSTR) {
+ case (line[i:i+1]) {
+ "\"" => {
+ l := len last_tok.tok;
+ if ((last_tok.tok[l-1:] != "\\") || ((last_tok.tok[l-1:] == "\\") && (last_tok.tok[l-2:l-1] == "\\"))) {
+ # end of str
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ } else {
+ # escaped dqte, just continue
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ }
+ };
+ * => {
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ }
+ }
+ } else if (last_tok.typ == S_SQSTR) {
+ case (line[i:i+1]) {
+ "'" => {
+ l := len last_tok.tok;
+ if ((last_tok.tok[l-1:] != "\\") || ((last_tok.tok[l-1:] == "\\") && (last_tok.tok[l-2:l-1] == "\\"))) {
+ # end of str
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ } else {
+ # escaped sqte, just continue
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ }
+ };
+ * => {
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ }
+ }
+ } else {
+ case (line[i:i+1]) {
+ " " or "\t" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ };
+ "=" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "=", S_EQ) :: toks;
+ };
+ ";" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, ";", S_SEMIC) :: toks;
+ };
+ "$" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "$", S_DOL) :: toks;
+ };
+ "(" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "(", S_LPAR) :: toks;
+ };
+ ")" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, ")", S_RPAR) :: toks;
+ };
+ "{" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "{", S_LCURLY) :: toks;
+ };
+ "}" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "}", S_RCURLY) :: toks;
+ };
+ "\"" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ last_tok.start = i;
+ last_tok.line = line_n;
+ last_tok.typ = S_DQSTR;
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ };
+ "'" => {
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ last_tok.start = i;
+ last_tok.line = line_n;
+ last_tok.typ = S_SQSTR;
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ };
+ * => {
+ if (last_tok.start == -1) {
+ last_tok.start = i;
+ last_tok.line = line_n;
+ last_tok.typ = S_ID;
+ }
+ last_tok.tok = last_tok.tok + line[i:i+1];
+ };
+ }
+ }
+ }
+ (last_tok, toks) = set_last_tok(ref last_tok, toks);
+ toks = ref mk_tok(i, line_n, "", S_EOL) :: toks;
+ toks = reverse_list(toks);
+ return to_array(toks);
+}
+
+stmt_assign(toks: array of ref TokNode) {
+ sys->print("ASSIGN STMT\n");
+}
+
+stmt_cmd_call(toks: array of ref TokNode) {
+ sys->print("CMD CALL\n");
+}
+
+empty(toks: array of ref TokNode) {
+ sys->print("EMPTY\n");
+}
+
+init(ctxt: ref Draw->Context, argv: list of string) {
+ sys = load Sys Sys->PATH;
+ sh9u = load Sh9Util Sh9Util->PATH;
+ sh9p = load Sh9Parser Sh9Parser->PATH;
+
+ assign_g_semic : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_SEMIC}, S_UNKNOWN, stmt_assign);
+ assign_g_eol : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_EOL}, S_UNKNOWN, stmt_assign);
+ sqstr_expr_g: GrammarNode = (array [] of {S_SQSTR}, S_EXPR, empty);
+ str_expr_g: GrammarNode = (array [] of {S_STR}, S_EXPR, empty);
+ cmd_call_g: GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_SEMIC}, S_UNKNOWN, stmt_cmd_call);
+ grammar: array of ref GrammarNode;
+ grammar = array [] of {ref assign_g_semic, ref assign_g_eol, ref sqstr_expr_g, ref str_expr_g, ref cmd_call_g};
+
+ toks1 := tokenize("A = 'smth \"test\" ';", 0);
+ print_toks(toks1);
+ sys->print("Parse\n");
+ parse_toks(toks1, grammar);
+ sys->print("Parse done\n");
+
+ # toks2 := tokenize("echo \"smth \" \"test\";", 0);
+ # print_toks(toks2);
+ # toks3 := tokenize("if test x\"a\" = x\"b\"; then echo \"1\"; fi", 0);
+ # print_toks(toks3);
+ # toks4 := tokenize("echo 'smth2' 'test';", 0);
+ # print_toks(toks4);
+}
diff --git a/sh9parser.b b/sh9parser.b
new file mode 100644
index 0000000..fcae119
--- /dev/null
+++ b/sh9parser.b
@@ -0,0 +1,130 @@
+implement Sh9Parser;
+
+include "sys.m";
+include "sh9parser.m";
+include "sh9util.m";
+
+sys: Sys;
+S_UNKNOWN: con "UNK";
+
+sh9u: Sh9Util;
+
+reverse_list: import sh9u;
+to_array: import sh9u;
+
+GrammarNode.print_expr(gn: self ref GrammarNode) {
+ lg:= len gn.expr;
+ for (i:=0; i<lg; i++) {
+ sys->print("%s ", gn.expr[i]);
+ }
+ if (gn.transform == S_UNKNOWN) {
+ sys->print("\n");
+ } else {
+ sys->print("-> %s\n", gn.transform);
+ }
+}
+
+mk_tok(start: int, line: int, tok: string, typ: string) : TokNode {
+ tok_node: TokNode;
+ tok_node.start = start;
+ tok_node.line = line;
+ tok_node.tok = tok;
+ tok_node.typ = typ;
+ return tok_node;
+}
+
+set_last_tok(last_tok: ref TokNode, toks: list of ref TokNode): (TokNode, list of ref TokNode) {
+ ret_tok: TokNode;
+ ret_tok = *last_tok;
+ if (last_tok.typ != S_UNKNOWN) {
+ toks = last_tok :: toks;
+ ret_tok.typ = S_UNKNOWN;
+ ret_tok.start = -1;
+ ret_tok.tok = "";
+ ret_tok.line = -1;
+ }
+ return (ret_tok, toks);
+}
+
+print_toks(toks: array of ref TokNode) {
+ lt := len toks;
+ for (i := 0; i < lt; i ++) {
+ tok := toks[i];
+ sys->print("[%d/%d] %s (%s)\n", i, lt, tok.typ, tok.tok);
+ }
+}
+
+print_toks_short(toks: array of ref TokNode) {
+ lt := len toks;
+ for (i := 0; i < lt; i ++) {
+ tok := toks[i];
+ sys->print("%s ", tok.typ);
+ }
+ sys->print("\n");
+}
+
+check_grammar_node_match(toks: array of ref TokNode, gn: ref GrammarNode): int {
+ lt:= len toks;
+ lg:= len gn.expr;
+ if (lg > lt) {
+ return 0;
+ }
+ #sys->print("Checking grammar ");
+ gn.print_expr();
+ #sys->print("Against ");
+ print_toks(toks);
+ for (i:= 0; i < lg; i ++) {
+ if (toks[i].typ != gn.expr[i]) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+replace_toks(src: array of ref TokNode, replace_start: int, replace_len: int, replace_with: array of ref TokNode): array of ref TokNode {
+ src_len:= len src;
+ new_toks: list of ref TokNode;
+ with_len:= len replace_with;
+ for (i:=0; i<replace_start; i++) {
+ new_toks = src[i] :: new_toks;
+ }
+ for (i=0; i<with_len; i++) {
+ new_toks = replace_with[i] :: new_toks;
+ }
+ for (i=replace_start + replace_len; i<src_len; i++) {
+ new_toks = src[i] :: new_toks;
+ }
+ new_toks = reverse_list(new_toks);
+ return to_array(new_toks);
+}
+
+parse_toks(toks: array of ref TokNode, g: array of ref GrammarNode): array of ref TokNode {
+ lgns := len g;
+ changed := 0;
+ ctr := 0;
+ do
+ {
+ lt := len toks;
+ sys->print("Loop %d: ", ctr);
+ print_toks_short(toks);
+ ctr ++;
+ changed = 0;
+ fast: for (i := 0; i <= lt; i ++) {
+ for (j := 0; j < lgns; j++) {
+ gj:= g[j];
+ if (check_grammar_node_match(toks[lt - i:], gj) == 1) {
+ sys->print("Something matched !\n");
+ gj.print_expr();
+ sys->print("Before replace: ");
+ print_toks_short(toks);
+ gj.callback(toks[lt-i: lt-i+len gj.expr]);
+ toks = replace_toks(toks, lt-i, len gj.expr, array[] of {ref mk_tok(toks[lt - i].start, toks[lt - i].line, "", gj.transform)});
+ sys->print("After replace: ");
+ changed = 1;
+ break fast;
+ }
+ }
+ }
+ } while(changed);
+ return toks;
+}
diff --git a/sh9parser.m b/sh9parser.m
new file mode 100644
index 0000000..cbad2f6
--- /dev/null
+++ b/sh9parser.m
@@ -0,0 +1,28 @@
+Sh9Parser: module
+{
+PATH: con "sh9parser.dis";
+DESCR: con "Mostly generic parser for sh9";
+
+mk_tok: fn(start: int, line: int, tok: string, typ: string) : TokNode;
+set_last_tok: fn(last_tok: ref TokNode, toks: list of ref TokNode): (TokNode, list of ref TokNode);
+print_toks: fn(toks: array of ref TokNode);
+print_toks_short: fn(toks: array of ref TokNode);
+check_grammar_node_match: fn(toks: array of ref TokNode, gn: ref GrammarNode): int;
+replace_toks: fn(src: array of ref TokNode, replace_start: int, replace_len: int, replace_with: array of ref TokNode): array of ref TokNode;
+parse_toks: fn(toks: array of ref TokNode, g: array of ref GrammarNode): array of ref TokNode;
+
+TokNode: adt {
+ start: int;
+ line: int;
+ tok: string;
+ typ: string;
+};
+
+GrammarNode: adt {
+ expr: array of string;
+ transform: string;
+
+ callback: ref fn(toks: array of ref TokNode);
+ print_expr: fn(gn: self ref GrammarNode);
+};
+};
diff --git a/sh9util.b b/sh9util.b
new file mode 100644
index 0000000..f74032e
--- /dev/null
+++ b/sh9util.b
@@ -0,0 +1,26 @@
+implement Sh9Util;
+
+include "sh9util.m";
+
+reverse_list[T](toks: list of T): list of T
+{
+ lt := len toks;
+ out : list of T;
+ for (i := 0; i < lt; i ++) {
+ tok := hd toks;
+ #toks = tl toks;
+ out = tok :: out;
+ }
+ return out;
+}
+
+to_array[T](toks: list of T): array of T {
+ lt := len toks;
+ out := array[lt] of T;
+ for (i := 0; i < lt; i ++) {
+ tok := hd toks;
+ toks = tl toks;
+ out[i] = tok;
+ }
+ return out;
+}
diff --git a/sh9util.m b/sh9util.m
new file mode 100644
index 0000000..aa44c3f
--- /dev/null
+++ b/sh9util.m
@@ -0,0 +1,8 @@
+Sh9Util: module
+{
+PATH: con "sh9util.dis";
+DESCR: con "Utility functions for sh9";
+
+reverse_list: fn[T](toks: list of T): list of T;
+to_array: fn[T](toks: list of T): array of T;
+};