summaryrefslogtreecommitdiff
path: root/appl/cmd/sh/regex.b
diff options
context:
space:
mode:
Diffstat (limited to 'appl/cmd/sh/regex.b')
-rw-r--r--appl/cmd/sh/regex.b220
1 files changed, 220 insertions, 0 deletions
diff --git a/appl/cmd/sh/regex.b b/appl/cmd/sh/regex.b
new file mode 100644
index 00000000..e761a8ba
--- /dev/null
+++ b/appl/cmd/sh/regex.b
@@ -0,0 +1,220 @@
+implement Shellbuiltin;
+
+include "sys.m";
+ sys: Sys;
+include "draw.m";
+include "sh.m";
+ sh: Sh;
+ Listnode, Context: import sh;
+ myself: Shellbuiltin;
+include "regex.m";
+ regex: Regex;
+
+initbuiltin(ctxt: ref Context, shmod: Sh): string
+{
+ sys = load Sys Sys->PATH;
+ sh = shmod;
+ myself = load Shellbuiltin "$self";
+ if (myself == nil)
+ ctxt.fail("bad module", sys->sprint("regex: cannot load self: %r"));
+ regex = load Regex Regex->PATH;
+ if (regex == nil)
+ ctxt.fail("bad module",
+ sys->sprint("regex: cannot load %s: %r", Regex->PATH));
+ ctxt.addbuiltin("match", myself);
+ ctxt.addsbuiltin("re", myself);
+ return nil;
+}
+
+getself(): Shellbuiltin
+{
+ return myself;
+}
+
+runbuiltin(ctxt: ref Context, nil: Sh,
+ argv: list of ref Listnode, nil: int): string
+{
+ case (hd argv).word {
+ "match" =>
+ return builtin_match(ctxt, argv);
+ }
+ return nil;
+}
+
+whatis(nil: ref Sh->Context, nil: Sh, nil: string, nil: int): string
+{
+ return nil;
+}
+
+runsbuiltin(ctxt: ref Context, nil: Sh,
+ argv: list of ref Listnode): list of ref Listnode
+{
+ name := (hd argv).word;
+ case name {
+ "re" =>
+ return sbuiltin_re(ctxt, argv);
+ }
+ return nil;
+}
+
+sbuiltin_re(ctxt: ref Context, argv: list of ref Listnode): list of ref Listnode
+{
+ if (tl argv == nil)
+ ctxt.fail("usage", "usage: re (g|v|s|sg|m|mg|M) arg...");
+ argv = tl argv;
+ w := (hd argv).word;
+ case w {
+ "g" or
+ "v" =>
+ return sbuiltin_sel(ctxt, argv, w == "v");
+ "s" or
+ "sg" =>
+ return sbuiltin_sub(ctxt, argv, w == "sg");
+ "m" =>
+ return sbuiltin_match(ctxt, argv, 0);
+ "mg" =>
+ return sbuiltin_gmatch(ctxt, argv);
+ "M" =>
+ return sbuiltin_match(ctxt, argv, 1);
+ * =>
+ ctxt.fail("usage", "usage: re (g|v|s|sg|m|mg|M) arg...");
+ return nil;
+ }
+}
+
+sbuiltin_match(ctxt: ref Context, argv: list of ref Listnode, aflag: int): list of ref Listnode
+{
+ if (len argv != 3)
+ ctxt.fail("usage", "usage: re " + (hd argv).word + " arg");
+ argv = tl argv;
+ re := getregex(ctxt, word(hd argv), aflag);
+ w := word(hd tl argv);
+ a := regex->execute(re, w);
+ if (a == nil)
+ return nil;
+ ret: list of ref Listnode;
+ for (i := len a - 1; i >= 0; i--)
+ ret = ref Listnode(nil, elem(a, i, w)) :: ret;
+ return ret;
+}
+
+sbuiltin_gmatch(ctxt: ref Context, argv: list of ref Listnode): list of ref Listnode
+{
+ if (len argv != 3)
+ ctxt.fail("usage", "usage: re mg arg");
+ argv = tl argv;
+ re := getregex(ctxt, word(hd argv), 0);
+ w := word(hd tl argv);
+ ret, nret: list of ref Listnode;
+ beg := 0;
+ while ((a := regex->executese(re, w, (beg, len w), beg == 0, 1)) != nil) {
+ (s, e) := a[0];
+ ret = ref Listnode(nil, w[s:e]) :: ret;
+ if (s == e)
+ break;
+ beg = e;
+ }
+ for (; ret != nil; ret = tl ret)
+ nret = hd ret :: nret;
+ return nret;
+}
+
+sbuiltin_sel(ctxt: ref Context, argv: list of ref Listnode, vflag: int): list of ref Listnode
+{
+ cmd := (hd argv).word;
+ argv = tl argv;
+ if (argv == nil)
+ ctxt.fail("usage", "usage: " + cmd + " regex [arg...]");
+ re := getregex(ctxt, word(hd argv), 0);
+ ret, nret: list of ref Listnode;
+ for (argv = tl argv; argv != nil; argv = tl argv)
+ if (vflag ^ (regex->execute(re, word(hd argv)) != nil))
+ ret = hd argv :: ret;
+ for (; ret != nil; ret = tl ret)
+ nret = hd ret :: nret;
+ return nret;
+}
+
+sbuiltin_sub(ctxt: ref Context, argv: list of ref Listnode, gflag: int): list of ref Listnode
+{
+ cmd := (hd argv).word;
+ argv = tl argv;
+ if (argv == nil || tl argv == nil)
+ ctxt.fail("usage", "usage: " + cmd + " regex subs [arg...]");
+ re := getregex(ctxt, word(hd argv), 1);
+ subs := word(hd tl argv);
+ ret, nret: list of ref Listnode;
+ for (argv = tl tl argv; argv != nil; argv = tl argv)
+ ret = ref Listnode(nil, substitute(word(hd argv), re, subs, gflag).t1) :: ret;
+ for (; ret != nil; ret = tl ret)
+ nret = hd ret :: nret;
+ return nret;
+}
+
+builtin_match(ctxt: ref Context, argv: list of ref Listnode): string
+{
+ if (tl argv == nil)
+ ctxt.fail("usage", "usage: match regexp [arg...]");
+ re := getregex(ctxt, word(hd tl argv), 0);
+ for (argv = tl tl argv; argv != nil; argv = tl argv)
+ if (regex->execute(re, word(hd argv)) == nil)
+ return "no match";
+ return nil;
+}
+
+substitute(w: string, re: Regex->Re, subs: string, gflag: int): (int, string)
+{
+ matched := 0;
+ s := "";
+ beg := 0;
+ do {
+ a := regex->executese(re, w, (beg, len w), beg == 0, 1);
+ if (a == nil)
+ break;
+ matched = 1;
+ s += w[beg:a[0].t0];
+ for (i := 0; i < len subs; i++) {
+ if (subs[i] != '\\' || i == len subs - 1)
+ s[len s] = subs[i];
+ else {
+ c := subs[++i];
+ if (c < '0' || c > '9')
+ s[len s] = c;
+ else
+ s += elem(a, c - '0', w);
+ }
+ }
+ beg = a[0].t1;
+ if (a[0].t0 == a[0].t1)
+ break;
+ } while (gflag && beg < len w);
+ return (matched, s + w[beg:]);
+}
+
+elem(a: array of (int, int), i: int, w: string): string
+{
+ if (i < 0 || i >= len a)
+ return nil; # XXX could raise failure here. (invalid backslash escape)
+ (s, e) := a[i];
+ if (s == -1)
+ return nil;
+ return w[s:e];
+}
+
+# XXX could do regex caching here if it was worth it.
+getregex(ctxt: ref Context, res: string, flag: int): Regex->Re
+{
+ (re, err) := regex->compile(res, flag);
+ if (re == nil)
+ ctxt.fail("bad regex", "regex: bad regex \"" + res + "\": " + err);
+ return re;
+}
+
+word(n: ref Listnode): string
+{
+ if (n.word != nil)
+ return n.word;
+ if (n.cmd != nil)
+ n.word = sh->cmd2string(n.cmd);
+ return n.word;
+}