From 70c48bade8529b4b5f762754db96982c8c95c31a Mon Sep 17 00:00:00 2001 From: "Konstantin Kirik (snegovick)" Date: Sat, 13 Dec 2025 05:10:17 +0300 Subject: Add parser context --- appl/cmd/sh92.b | 94 ++++++++++++++++++++++++++++++++-------------------- appl/lib/sh9parser.b | 45 ++++++++++++++++--------- module/sh9parser.m | 24 +++++++++++++- 3 files changed, 111 insertions(+), 52 deletions(-) diff --git a/appl/cmd/sh92.b b/appl/cmd/sh92.b index 9748da7..fe53ceb 100644 --- a/appl/cmd/sh92.b +++ b/appl/cmd/sh92.b @@ -13,22 +13,11 @@ Sh92: module { init: fn(nil: ref Draw->Context, nil: list of string); }; -ModProc: adt { - name: string; - start: int; -}; - -ModVar: adt { - name: string; - val: string; -}; - -ShModule: adt { - global_vars: list of ref ModVar; - procs: list of ref ModProc; -}; - GrammarNode: import sh9p; +ModProc: import sh9p; +ModVar: import sh9p; +ShModule: import sh9p; +ParserCtx: import sh9p; TokNode: import sh9p; mk_tok: import sh9p; set_last_tok: import sh9p; @@ -39,10 +28,10 @@ reverse_list: import sh9u; to_array: import sh9u; S_UNKNOWN: con "UNK"; +S_NONE: con "NONE"; S_ID: con "ID"; S_STR: con "STR"; S_EQ: con "EQ"; -S_DOL: con "DOL"; S_COLON: con "COLON"; S_SEMIC: con "SEMIC"; S_LPAR: con "LPAR"; @@ -55,6 +44,7 @@ S_DQTE: con "DQTE"; S_SQTE: con "SQTE"; S_SP: con "SP"; S_TAB: con "TAB"; +S_DOLL: con "DOLL"; S_EOL: con "EOL"; S_STMT: con "STMT"; @@ -118,9 +108,9 @@ tokenize(line: string, line_n: int): array of ref TokNode { (last_tok, toks) = set_last_tok(last_tok, toks); toks = mk_tok(i, line_n, ";", S_SEMIC) :: toks; }; - "$" => { + ":" => { (last_tok, toks) = set_last_tok(last_tok, toks); - toks = mk_tok(i, line_n, "$", S_DOL) :: toks; + toks = mk_tok(i, line_n, ":", S_COLON) :: toks; }; "(" => { (last_tok, toks) = set_last_tok(last_tok, toks); @@ -138,6 +128,10 @@ tokenize(line: string, line_n: int): array of ref TokNode { (last_tok, toks) = set_last_tok(last_tok, toks); toks = mk_tok(i, line_n, "}", S_RCURLY) :: toks; }; + "$" => { + (last_tok, toks) = set_last_tok(last_tok, toks); + toks = mk_tok(i, line_n, "$", S_DOLL) :: toks; + }; "\"" => { (last_tok, toks) = set_last_tok(last_tok, toks); last_tok.start = i; @@ -169,21 +163,54 @@ tokenize(line: string, line_n: int): array of ref TokNode { return to_array(toks); } -stmt_assign(toks: array of ref TokNode) { +stmt_assign(c: ref ParserCtx, toks: array of ref TokNode): array of ref TokNode { sys->print("ASSIGN STMT\n"); + return array[0] of ref TokNode; } -stmt_cmd_call(toks: array of ref TokNode) { +stmt_cmd_call(c: ref ParserCtx, toks: array of ref TokNode): array of ref TokNode { sys->print("CMD CALL\n"); + return array[0] of ref TokNode; } -empty(toks: array of ref TokNode) { - sys->print("EMPTY\n"); +empty(c: ref ParserCtx, toks: array of ref TokNode): array of ref TokNode { + return array[0] of ref TokNode; } -Te: adt{ - s: string; -}; +var_sub_expr(c: ref ParserCtx, toks: array of ref TokNode): array of ref TokNode { + sys->print("VAR SUB\n"); + return array[0] of ref TokNode; +} + + +mk_grammar(ctx: ref ParserCtx): array of ref GrammarNode +{ + semic_eol_g : GrammarNode = (array [] of {S_SEMIC, S_EOL}, S_EOL, empty, ctx); + assign_g_semic : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_SEMIC}, S_NONE, stmt_assign, ctx); + assign_g_eol : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_EOL}, S_NONE, stmt_assign, ctx); + sqstr_expr_g: GrammarNode = (array [] of {S_SQSTR}, S_EXPR, empty, ctx); + str_expr_g: GrammarNode = (array [] of {S_STR}, S_EXPR, empty, ctx); + expr_combinator_g: GrammarNode = (array [] of {S_EXPR, S_EXPR}, S_EXPR, empty, ctx); + cmd_call_g: GrammarNode = (array [] of {S_ID, S_EXPR, S_SEMIC}, nil, stmt_cmd_call, ctx); + + var_sub_g: GrammarNode = (array [] of {S_DOLL, S_ID}, nil, var_sub_expr, ctx); + var_sub_curl_g: GrammarNode = (array [] of {S_DOLL, S_LCURLY, S_ID, S_RCURLY}, nil, var_sub_expr, ctx); + dqstr_expr_g: GrammarNode = (array [] of {S_DQTE, S_EXPR, S_DQTE}, nil, empty, ctx); + + grammar: array of ref GrammarNode; + grammar = array [] of { + ref semic_eol_g, + ref assign_g_semic, + ref assign_g_eol, + ref sqstr_expr_g, + ref str_expr_g, + ref cmd_call_g, + ref expr_combinator_g, + ref var_sub_g, + ref var_sub_curl_g, + }; + return grammar; +} init(ctxt: ref Draw->Context, argv: list of string) { sys = load Sys Sys->PATH; @@ -191,20 +218,15 @@ init(ctxt: ref Draw->Context, argv: list of string) { sh9p = load Sh9Parser Sh9Parser->PATH; sh9p->init(); - - assign_g_semic : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_SEMIC}, S_UNKNOWN, stmt_assign); - assign_g_eol : GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_EOL}, S_UNKNOWN, stmt_assign); - sqstr_expr_g: GrammarNode = (array [] of {S_SQSTR}, S_EXPR, empty); - str_expr_g: GrammarNode = (array [] of {S_STR}, S_EXPR, empty); - cmd_call_g: GrammarNode = (array [] of {S_ID, S_EQ, S_EXPR, S_SEMIC}, S_UNKNOWN, stmt_cmd_call); - grammar: array of ref GrammarNode; - grammar = array [] of {ref assign_g_semic, ref assign_g_eol, ref sqstr_expr_g, ref str_expr_g, ref cmd_call_g}; + pctx:= ref ParserCtx; + pctx.add_module("shell"); toks1 := tokenize("AB = 'smth \"test\" ';", 0); - print_toks(toks1); - sys->print("Parse\n"); + #print_toks(toks1); + #sys->print("Parse\n"); + grammar:= mk_grammar(pctx); parse_toks(toks1, grammar); - sys->print("Parse done\n"); + #sys->print("Parse done\n"); # toks2 := tokenize("echo \"smth \" \"test\";", 0); # print_toks(toks2); diff --git a/appl/lib/sh9parser.b b/appl/lib/sh9parser.b index ac46b40..8df4cfc 100644 --- a/appl/lib/sh9parser.b +++ b/appl/lib/sh9parser.b @@ -6,13 +6,15 @@ include "sh9util.m"; sys: Sys; S_UNKNOWN: con "UNK"; +S_NONE: con "NONE"; sh9u: Sh9Util; reverse_list: import sh9u; to_array: import sh9u; -GrammarNode.print_expr(gn: self ref GrammarNode) { +GrammarNode.print_expr(gn: self ref GrammarNode) +{ lg:= len gn.expr; for (i:=0; iprint("%s ", gn.expr[i]); @@ -24,13 +26,21 @@ GrammarNode.print_expr(gn: self ref GrammarNode) { } } +ParserCtx.add_module(ctx: self ref ParserCtx, name: string) +{ + m:= ref ShModule; + m.name = name; + ctx.modules = m :: ctx.modules; +} + init() { sys = load Sys Sys->PATH; sh9u = load Sh9Util Sh9Util->PATH; } -mk_tok(start: int, line: int, tok: string, typ: string) : ref TokNode { +mk_tok(start: int, line: int, tok: string, typ: string) : ref TokNode +{ tok_node: TokNode; tok_node.start = start; tok_node.line = line; @@ -40,7 +50,7 @@ mk_tok(start: int, line: int, tok: string, typ: string) : ref TokNode { } set_last_tok(last_tok: ref TokNode, toks: list of ref TokNode): (ref TokNode, list of ref TokNode) { - sys->print("last_tok: %s\n", last_tok.typ); + #sys->print("last_tok: %s\n", last_tok.typ); ret_tok: TokNode; #ret_tok = *last_tok; ret_tok.typ = last_tok.typ; @@ -54,7 +64,7 @@ set_last_tok(last_tok: ref TokNode, toks: list of ref TokNode): (ref TokNode, li ret_tok.tok = ""; ret_tok.line = -1; } - sys->print("ret_tok: %s\n", ret_tok.typ); + #sys->print("ret_tok: %s\n", ret_tok.typ); return (ref ret_tok, toks); } @@ -82,9 +92,9 @@ check_grammar_node_match(toks: array of ref TokNode, gn: ref GrammarNode): int { return 0; } #sys->print("Checking grammar "); - gn.print_expr(); + #gn.print_expr(); #sys->print("Against "); - print_toks(toks); + #print_toks(toks); for (i:= 0; i < lg; i ++) { if (toks[i].typ != gn.expr[i]) { return 0; @@ -117,21 +127,26 @@ parse_toks(toks: array of ref TokNode, g: array of ref GrammarNode): array of re do { lt := len toks; - sys->print("Loop %d: ", ctr); - print_toks_short(toks); + #sys->print("Loop %d: ", ctr); + #print_toks_short(toks); ctr ++; changed = 0; fast: for (i := 0; i <= lt; i ++) { for (j := 0; j < lgns; j++) { gj:= g[j]; if (check_grammar_node_match(toks[lt - i:], gj) == 1) { - sys->print("Something matched !\n"); - gj.print_expr(); - sys->print("Before replace: "); - print_toks_short(toks); - gj.callback(toks[lt-i: lt-i+len gj.expr]); - toks = replace_toks(toks, lt-i, len gj.expr, array[] of {mk_tok(toks[lt - i].start, toks[lt - i].line, "", gj.transform)}); - sys->print("After replace: "); + #sys->print("Something matched !\n"); + #gj.print_expr(); + #sys->print("Before replace: "); + #print_toks_short(toks); + + gj.callback(gj.ctx, toks[lt-i: lt-i+len gj.expr]); + if (gj.transform == S_NONE) { + toks = replace_toks(toks, lt-i, len gj.expr, array[0] of ref TokNode); + } else { + toks = replace_toks(toks, lt-i, len gj.expr, array[] of {mk_tok(toks[lt - i].start, toks[lt - i].line, "", gj.transform)}); + } + #sys->print("After replace: "); changed = 1; break fast; } diff --git a/module/sh9parser.m b/module/sh9parser.m index 0807105..22d3f09 100644 --- a/module/sh9parser.m +++ b/module/sh9parser.m @@ -19,11 +19,33 @@ TokNode: adt { typ: string; }; +ModProc: adt { + name: string; + start: int; +}; + +ModVar: adt { + name: string; + val: string; +}; + +ShModule: adt { + name: string; + vars: list of ref ModVar; + procs: list of ref ModProc; +}; + +ParserCtx: adt { + modules: list of ref ShModule; + add_module: fn(ctx: self ref ParserCtx, name: string); +}; + GrammarNode: adt { expr: array of string; transform: string; - callback: ref fn(toks: array of ref TokNode); + callback: ref fn(ctx: ref ParserCtx, toks: array of ref TokNode): array of ref TokNode; + ctx: ref ParserCtx; print_expr: fn(gn: self ref GrammarNode); }; }; -- cgit v1.2.3