;;; SPDX-FileCopyrightText: 2025 Sergei Egorov ;;; SPDX-License-Identifier: MIT ;======================================================================================== ; ; Parser for Scheme string regular expressions (SSRE) ; ;======================================================================================== ; Helpers (define-syntax receive (syntax-rules () ((receive formals expression body ...) (call-with-values (lambda () expression) (lambda formals body ...))))) (define (andmap p l) (or (null? l) (and (p (car l)) (andmap p (cdr l))))) (define (ormap p l) (and (pair? l) (or (p (car l)) (ormap p (cdr l))))) (define (decimal c) (- (char->integer c) (char->integer #\0))) (define (fail s msg . args) (raise (list 'string-sre->sre s msg args))) (define opar #\() (define cpar #\)) (define obrk #\[) (define cbrk #\]) (define obrc #\{) (define cbrc #\}) (define (skip s) ;=> s (sans leading atmosphere) (let loop ((s s)) (cond ((null? s) s) ((char-whitespace? (car s)) (loop (cdr s))) ((eqv? (car s) #\#) (let lp ((s (cdr s))) (cond ((null? s) s) ((eqv? (car s) #\newline) (loop (cdr s))) (else (lp (cdr s)))))) (else s)))) (define (prefix? p s) (cond ((string? p) (prefix? (string->list p) s)) ((not s) #f) ((null? p) s) ((null? s) #f) ((and (procedure? (car p)) ((car p) (car s))) (prefix? (cdr p) (cdr s))) ((eqv? (car p) (car s)) (prefix? (cdr p) (cdr s))) (else #f))) ; Option flags are symbols from the {i, m, s, x, n, u} set (define o-set? memq) (define (o-add f o) (if (memq f o) o (cons f o))) (define (o-del f o) (cond ((null? o) o) ((eq? f (car o)) (o-del f (cdr o))) (else (cons (car o) (o-del f (cdr o)))))) (define (o-lookup n o) (cond ((null? o) #f) ((and (pair? (car o)) (eq? n (caar o))) (car o)) (else (o-lookup n (cdr o))))) (define (o-skip s o) (if (o-set? 'x o) (skip s) s)) (define (o-wrappers o0 o1) ;=> (w/x w/y ...) (let loop ((o o1) (wl '())) (cond ((and (o-set? 'u o0) (not (o-set? 'u o))) (loop (o-add 'u o) (cons 'w/ascii wl))) ((and (not (o-set? 'u o0)) (o-set? 'u o)) (loop (o-del 'u o) (cons 'w/unicode wl))) ((and (o-set? 'i o0) (not (o-set? 'i o))) (loop (o-add 'i o) (cons 'w/case wl))) ((and (not (o-set? 'i o0)) (o-set? 'i o)) (loop (o-del 'i o) (cons 'w/nocase wl))) (else wl)))) ; SRE consructors (define (e-e) '(:)) (define (e-bos o) (if (o-set? 'm o) 'bol 'bos)) (define (e-eos o) (if (o-set? 'm o) 'eol 'eos)) (define (e-dot o) (if (o-set? 's o) 'any 'nonl)) (define (e-char c) c) (define (e-cset cs) cs) (define (e-shortcut c s) (case c ((#\\ #\^ #\$ #\. #\| #\* #\+ #\? #\[ #\] #\( #\) #\{ #\}) c) ((#\b) '(or bow eow)) ((#\B) 'nwb) ((#\<) 'bow) ((#\>) 'eow) ((#\A) 'bos) ((#\z) 'eos) ((#\d) 'numeric) ((#\D) '(~ numeric)) ((#\s) 'space) ((#\S) '(~ space)) ((#\w) '(or alnum #\_)) ((#\W) '(~ (or alnum #\_))) ((#\X) 'grapheme) ((#\Z) '(: (? #\newline) eos)) ;NB: full regexps (else (cond ((or (char-whitespace? c) (eqv? c #\#)) c) ; for x mode ((char-numeric? c) (list 'backref (decimal c))) (else (fail s (string-append "non-supported escape: \\" (string c)))))))) (define (e-class-shortcut c s) (case c ((#\\ #\^ #\- #\[ #\]) c) ((#\d) 'numeric) ((#\D) '(~ numeric)) ((#\s) 'space) ((#\S) '(~ space)) ((#\w) '(or alnum #\_)) ((#\W) '(~ (or alnum #\_))) (else (cond ((or (char-whitespace? c) (eqv? c #\#)) c) (else (fail s (string-append "non-supported class escape: \\" (string c)))))))) (define (with-e wl e) (if (null? wl) e (list (car wl) (with-e (cdr wl) e)))) (define (or-e e1 e2) (if (and (pair? e1) (eqv? (car e1) 'or)) (append e1 (list e2)) (list 'or e1 e2))) (define (and-e e1 e2) (cond ((and (pair? e1) (eqv? (car e1) 'and)) (append e1 (list e2))) ((and (pair? e2) (eqv? (car e2) '~)) (list '- e1 (cadr e2))) (else (list 'and e1 e2)))) (define (diff-e e1 e2) (if (and (pair? e1) (eqv? (car e1) 'diff)) (append e1 (list e2)) (list '- e1 e2))) (define (range-e e1 e2) (list 'char-range e1 e2)) (define (inv-e e) (list '~ e)) (define (not-e e) (cond ((member e '((or bow eow) (or eow bow))) 'nwb) ((eq? e 'nwb) '(or bow eow)) ((and (pair? e) (eq? (car e) 'look-ahead)) (list 'neg-look-ahead (cadr e))) ((and (pair? e) (eq? (car e) 'neg-look-ahead)) (list 'look-ahead (cadr e))) ((and (pair? e) (eq? (car e) 'look-behind)) (list 'neg-look-behind (cadr e))) ((and (pair? e) (eq? (car e) 'neg-look-behind)) (list 'look-behind (cadr e))) (else (list 'neg-look-ahead e)))) (define (conc-e e1 e2) (if (and (pair? e1) (eqv? (car e1) ':)) (append e1 (list e2)) (list ': e1 e2))) (define (star-e e) (list '* e)) (define (plus-e e) (list '+ e)) (define (rept-e m n e) ; n can be #f (cond ((not n) (list '>= m e)) ((eqv? m n) (list '= m e)) (else (list '** m n e)))) ; NB: here we rely on a 'hidden feature' of the SRE specification: the second counter ; of the ** repeat can be #f (standing in for infinity); this extension is supported ; by Alex Shinn's reference implementation for both ** and **?, which makes it unnecessary ; to have nongreedy version of >= and/or duplicate repeated expression as a workaround ; If your SRE implementation does not support it, you may use (: e (*? e)) for +? and ; (: (**? m m e) (*? e)) for +=? if not for the fact that duplicated groups will not ; be counted properly (define (opt-e e) (if (pair? e) (case (car e) ((?) `(?? ,(cadr e))) ((*) `(*? ,(cadr e))) ((+) `(**? 1 #f ,(cadr e))) ; see note above ((=) `(**? ,(cadr e) ,(cadr e) ,(caddr e))) ((>=) `(**? ,(cadr e) #f ,(caddr e))) ; see note above ((**) `(**? ,@(cdr e))) (else `(? ,e))) (list '? e))) (define (pre-e e e1) (list e e1)) (define (group-e e) (list '$ e)) (define (ungroup-e e) (if (and (= (length e) 2) (eq? (car e) '$)) (cadr e) e)) (define (namegroup-e name e) (list '-> name e)) (define (backref-e n) (list 'backref n)) (define (lookahead-e e) (list 'look-ahead e)) (define (lookbehind-e e) (list 'look-behind e)) (define (cs-e) '(or)) (define (cs-char c) (list 'or c)) (define (cs-union cs1 cs2) (cond ((equal? cs1 '(or)) cs2) ((and (pair? cs2) (eq? (car cs2) 'or) (pair? cs1) (eq? (car cs1) 'or)) (append cs2 (cdr cs1))) ((and (pair? cs2) (eq? (car cs2) 'or)) (append cs2 (list cs1))) (else (list 'or cs1 cs2)))) (define (cs-complement cs) (list '~ cs)) (define (cs-range c1 c2) (list 'char-range c1 c2)) (define (cs-flatten cs) (if (and (= (length cs) 2) (eq? (car cs) 'or)) (cadr cs) cs)) ; PCRE-like notation parser (define (parse-re-spec src o) ;=> e, s (define (parse-body s o) ;=> e, s (let ((s0 (prefix? "(?" s))) (if (and s0 (pair? s0) (or (char-alphabetic? (car s0)) (eqv? (car s0) #\-))) (receive (s1 o1) (parse-re-options s0 o) (if (prefix? ")" s1) ; allow for more than one (?o*) leader (receive (e s2) (parse-body (o-skip (cdr s1) o1) o1) (values (with-e (o-wrappers o o1) e) s2)) (parse-alt (o-skip s o) o))) (parse-alt (o-skip s o) o)))) (define (parse-alt s o) ;=> e, s (receive (e s) (parse-seq s o) (let loop ((e e) (s (o-skip s o))) (cond ((or (null? s) (eqv? (car s) cpar)) (values e s)) ((char=? (car s) #\|) (receive (e1 s1) (parse-seq (cdr s) o) (loop (or-e e e1) (o-skip s1 o)))) (else (values e s)))))) (define (parse-seq s o) ;=> e, s (let ((s (o-skip s o))) (if (or (null? s) (eqv? (car s) cpar) (eqv? (car s) #\|)) (values (e-e) s) (receive (e s) (parse-quant s o) (let loop ((e e) (s (o-skip s o))) (if (or (null? s) (eqv? (car s) cpar) (eqv? (car s) #\|)) (values e s) (receive (e1 s1) (parse-quant s o) (loop (conc-e e e1) (o-skip s1 o))))))))) (define (set-start? s o) (and (pair? s) (eqv? (car s) obrc) (let ((s (o-skip (cdr s) o))) (and (pair? s) (not (char-numeric? (car s))))))) (define (parse-quant s o) ;=> e, s (receive (e s) (parse-prim s o) (let loop ((e e) (s (o-skip s o))) (if (or (null? s) (set-start? s o)) (values e s) (case (car s) ((#\*) (loop (star-e e) (o-skip (cdr s) o))) ((#\+) (loop (plus-e e) (o-skip (cdr s) o))) ((#\?) (loop (opt-e e) (o-skip (cdr s) o))) ((#\{) (receive (e s) (parse-repeat e (cdr s) o) (loop e (o-skip s o)))) (else (values e s))))))) (define (parse-repeat e s o) ;=> e, s (receive (m n s) (parse-re-repeat s o) (unless (prefix? "}" s) (fail s "missing }")) (values (rept-e m n e) (cdr s)))) (define (parse-prim s o) ;=> e, s (cond ((eqv? (car s) #\^) (values (e-bos o) (cdr s))) ((eqv? (car s) #\$) (values (e-eos o) (cdr s))) ((eqv? (car s) #\.) (values (e-dot o) (cdr s))) ((prefix? "(?=" s) (parse-lookaround (cdddr s) o lookahead-e #f)) ((prefix? "(?!" s) (parse-lookaround (cdddr s) o lookahead-e #t)) ((prefix? "(?<=" s) (parse-lookaround (cddddr s) o lookbehind-e #f)) ((prefix? "(?" s) (fail s "missing > after name")) (receive (e s) (parse-prim (cons opar (cdr s)) o) (values (namegroup-e name (ungroup-e e)) s)))) ((prefix? "(?" s) (receive (s o1) (parse-re-options (cddr s) o) (unless (prefix? ":" s) (fail s "missing : after option flags")) (receive (e s) (parse-prim (cons opar (cdr s)) o1) (values (with-e (o-wrappers o o1) (ungroup-e e)) s)))) ((eqv? (car s) opar) (receive (e s) (parse-alt (cdr s) o) (unless (prefix? ")" s) (fail s "missing )")) (values (if (o-set? 'n o) e (group-e e)) (cdr s)))) ((eqv? (car s) obrk) (receive (cs s) (parse-re-class (cdr s) o) (unless (prefix? "]" s) (fail s "missing ]")) (values (e-cset cs) (cdr s)))) ((eqv? (car s) obrc) (receive (t e s) (parse-re-set (cdr s) o) (unless (prefix? "}" s) (fail s "missing }")) (values e (cdr s)))) ((prefix? "\\p" s) (parse-p-name (cddr s) o #f)) ((prefix? "\\P" s) (parse-p-name (cddr s) o #t)) ((prefix? "\\k<" s) (receive (name s) (parse-word (cdddr s)) (unless (prefix? ">" s) (fail s "missing > after name")) (values (backref-e name) (cdr s)))) ((prefix? (list #\\ char-numeric? char-numeric?) s) (values (backref-e (+ (* (decimal (cadr s)) 10) (decimal (caddr s)))) (cdddr s))) ((and (eqv? (car s) #\\) (pair? (cdr s))) (values (e-shortcut (cadr s) s) (cddr s))) ((memv (car s) '(#\\ #\^ #\$ #\. #\| #\* #\+ #\? #\[ #\] #\( #\) #\{ #\})) (fail s (string-append "misplaced/unescaped punctuation char: " (string (car s))))) (else (values (e-char (car s)) (cdr s))))) (define (parse-lookaround s o e-fn neg?) (receive (e s) (parse-alt s o) (unless (prefix? ")" s) (fail s "missing )")) (if neg? (values (not-e (e-fn e)) (cdr s)) (values (e-fn e) (cdr s))))) (parse-body src o)) (define (parse-word s) ;=> word, s (let loop ((s s) (l '())) (cond ((or (null? s) (not (or (char-alphabetic? (car s)) (eqv? (car s) #\_)))) (when (null? l) (fail s "empty name")) (values (string->symbol (list->string (reverse l))) s)) (else (loop (cdr s) (cons (car s) l)))))) (define (parse-re-options s o) ;=> s, o (let loop ((s s) (o o) (minus #f)) (cond ((or (null? s) (eqv? (car s) #\:) (eqv? (car s) cpar)) (values s o)) ((eqv? (car s) #\i) (loop (cdr s) (if minus (o-del 'i o) (o-add 'i o)) minus)) ((eqv? (car s) #\m) (loop (cdr s) (if minus (o-del 'm o) (o-add 'm o)) minus)) ((eqv? (car s) #\s) (loop (cdr s) (if minus (o-del 's o) (o-add 's o)) minus)) ((eqv? (car s) #\x) (loop (cdr s) (if minus (o-del 'x o) (o-add 'x o)) minus)) ((eqv? (car s) #\n) (loop (cdr s) (if minus (o-del 'n o) (o-add 'n o)) minus)) ((eqv? (car s) #\u) (loop (cdr s) (if minus (o-del 'u o) (o-add 'u o)) minus)) ((and (eqv? (car s) #\-) minus) (fail s "extra - in optons")) ((eqv? (car s) #\-) (loop (cdr s) o #t)) (else (fail s "unsupported option flag:" (car s)))))) (define (parse-re-repeat src o) ;=> m, n, src (define (nonrepeat-char? c) (and (not (char-numeric? c)) (not (eqv? c #\,)))) (let loop ((s (o-skip src o)) (m #f) (n #f) (comma #f)) (cond ((let ((s (o-skip s o))) (or (null? s) (nonrepeat-char? (car s)))) (when (not m) (fail s "missing range start in repeat")) (if comma (values m n (o-skip s o)) (values m m (o-skip s o)))) ((let ((s (o-skip s o))) (and (pair? s) (eqv? (car s) #\,))) (when comma (fail s "extra comma in repeat")) (when (not m) (fail s "missing range start in repeat")) (loop (o-skip (cdr (o-skip s o)) o) m n #t)) ((and comma (char-numeric? (car s))) (loop (cdr s) m (let ((d (decimal (car s)))) (if n (+ (* n 10) d) d)) #t)) ((char-numeric? (car s)) (loop (cdr s) (let ((d (decimal (car s)))) (if m (+ (* m 10) d) d)) n #f)) (else (fail s "unexpected char in {m,n} repeat spec:" (car s)))))) (define (parse-p-name s o inv?) ;=> cs, src (cond ((prefix? "{" s) (receive (w s1) (parse-word (cdr s)) (unless (prefix? "}" s1) (fail s1 "missing } after name")) (receive (t e) (ref-named-expr w o (cdr s)) (unless (eq? t 'cset) (fail (cdr s) (string-append "unknown named charset: " (symbol->string w)))) (values (if inv? (inv-e e) e) (cdr s1))))) ((and (pair? s) (or (char-alphabetic? (car s)) (eqv? (car s) #\_))) (let ((w (string->symbol (string (car s))))) (receive (t e) (ref-named-expr w o s) (unless (eq? t 'cset) (fail s (string-append "unknown named charset: " (symbol->string w)))) (values (if inv? (inv-e e) e) (cdr s))))) (else (fail s (string-append "missing charset name"))))) (define (parse-re-class src o) ;=> cs, src (define (range-rhs? s) (and (pair? s) (eqv? (car s) #\-) (pair? (cdr s)) (not (eqv? (cadr s) cbrk)))) (define (invalid-range s) (fail s "invalid range")) (define (parse-char s otherwise) ;=> c, s | (otherwise) (cond ((prefix? (list #\[ #\. char? #\. #\]) s) => (lambda (s1) (values (caddr s) s1))) ((prefix? "[=" s) (fail s "collation sequences not supported")) ((prefix? "[:" s) (otherwise s)) ((and (pair? s) (not (eqv? (car s) cbrk)) (not (eqv? (car s) #\\))) (values (car s) (cdr s))) ((and (pair? s) (eqv? (car s) #\\) (pair? (cdr s)) (memv (cadr s) '(#\\ #\^ #\- #\[ #\]))) (values (cadr s) (cddr s))) (else (otherwise s)))) (define (parse-class-element s) ;=> cs, s (cond ((prefix? "[:" s) (receive (w s1) (parse-word (cddr s)) (unless (prefix? ":]" s1) (fail s1 ":] expected")) (receive (t e) (ref-named-expr w o s) (unless (eq? t 'cset) (fail s (string-append "unknown named charset: " (symbol->string w)))) (values e (cddr s1))))) ((prefix? "\\p" s) (parse-p-name (cddr s) o #f)) ((prefix? "\\P" s) (parse-p-name (cddr s) o #t)) ((and (pair? s) (eqv? (car s) #\\)) (unless (pair? (cdr s)) (fail s "incomplete class shortcut")) (values (e-class-shortcut (cadr s) s) (cddr s))) (else (fail s "char or char class expected")))) (define (parse-element s) ;=> cs, s (receive (cs s) (parse-char s parse-class-element) (cond ((range-rhs? s) (unless (char? cs) (invalid-range s)) (receive (cs2 s) (parse-char (cdr s) invalid-range) (values (cs-range cs cs2) s))) (else (values cs s))))) (define (parse-elements cs s) ;=> cs, s (if (or (null? s) (eqv? (car s) cbrk)) (values (cs-flatten cs) s) (receive (cs1 s) (parse-element s) (parse-elements (cs-union cs1 cs) s)))) (define (parse-body s) (if (and (pair? s) (eqv? (car s) cbrk)) (parse-elements (cs-union (car s) (cs-e)) (cdr s)) (parse-elements (cs-e) s))) (if (and (pair? src) (eqv? (car src) #\^)) (receive (cs src) (parse-body (cdr src)) (values (cs-complement cs) src)) (parse-body src))) (define (parse-re-set src o) ;=> t, e, s (define (check-cset t e s op) (unless (eq? t 'cset) (fail s (string-append op " applied no non-cset argument") e s))) (define (check-bcnd t e s op) (unless (eq? t 'bcnd) (fail s (string-append op " applied no non-bcnd argument") e s))) (define (parse-or s o) ;=> t, e, s (receive (t e s) (parse-in s o) (let loop ((t t) (e e) (s (o-skip s o))) (cond ((or (null? s) (eqv? (car s) cbrc)) (values t e s)) ((char=? (car s) #\|) (receive (t1 e1 s1) (parse-in (cdr s) o) (let ((t (if (eq? t t1) t 'expr))) (loop t (or-e e e1) (o-skip s1 o))))) (else (values t e s)))))) (define (parse-in s o) ;=> t, e, s (receive (t e s) (parse-pre s o) (let loop ((t t) (e e) (s (o-skip s o))) (cond ((and (pair? s) (char=? (car s) #\&)) (check-cset t e s "&") (receive (t1 e1 s1) (parse-pre (cdr s) o) (check-cset t1 e1 s1 "&") (loop t (and-e e e1) (o-skip s1 o)))) ((and (pair? s) (char=? (car s) #\-)) (check-cset t e s "-") (receive (t1 e1 s1) (parse-pre (cdr s) o) (check-cset t1 e1 s1 "-") (loop t (diff-e e e1) (o-skip s1 o)))) (else (values t e s)))))) (define (parse-pre s o) ;=> t, e, s (let loop ((s (o-skip s o))) (cond ((and (pair? s) (eqv? (car s) #\~)) (receive (t e s1) (loop (o-skip (cdr s) o)) (check-cset t e (cdr s) "~") (values 'cset (inv-e e) s1))) ((and (pair? s) (eqv? (car s) #\!)) (receive (t e s1) (loop (o-skip (cdr s) o)) (check-bcnd t e (cdr s) "!") (values 'bcnd (not-e e) s1))) ((and (pair? s) (eqv? (car s) obrc)) (receive (t e s) (parse-re-set (cdr s) o) (when (or (null? s) (not (eqv? (car s) cbrc))) (fail s "missing }")) (values t e (cdr s)))) ((and (pair? s) (eqv? (car s) obrk)) (receive (cs s) (parse-re-class (cdr s) o) (when (or (null? s) (not (eqv? (car s) cbrk))) (fail s "missing )")) (values 'cset (e-cset cs) (cdr s)))) (else (parse-prim s o))))) (define (parse-prim s0 o) ;=> t, e, s (define (name-char? c) (or (char-alphabetic? c) (eqv? c #\_) (eqv? c #\<) (eqv? c #\>))) (if (or (eqv? (car s0) #\^) (eqv? (car s0) #\/)) (let ((name (string->symbol (string (car s0))))) (receive (t e) (ref-named-expr name o s0) (values t e (cdr s0)))) (let loop ((s s0) (l '())) (cond ((and (pair? s) (name-char? (car s))) (loop (cdr s) (cons (car s) l))) ((pair? l) (let ((name (string->symbol (list->string (reverse l))))) (receive (t e) (ref-named-expr name o s0) (values t e s)))) (else (fail s0 "name expected")))))) (receive (t e s) (parse-or (o-skip src o) o) (values t e (o-skip s o)))) (define named-exprs '( (/ cset #\\) (^ cset #\^) (any cset any) (_ cset any) (digit cset numeric) (n cset numeric) (d cset numeric) (lower cset lower) (l cset lower) (upper cset upper) (u cset upper) (alpha cset alpha) (a cset alpha) (alnum cset alnum) (an cset alnum) (xdigit cset xdigit) (x cset xdigit) (cntrl cset cntrl) (c cset cntrl) (punct cset punct) (p cset punct) (graph cset graph) (g cset graph) (symbol cset symbol) (y cset symbol) (print cset print) (gs cset print) (blank cset (or #\space #\tab)) (h cset (or #\space #\tab)) (space cset space) (s cset space) (w cset (or alnum #\_)) (v cset (- space (or #\space #\tab))) (bos bcnd bos) ( bcnd eos) (bol bcnd bol) ( bcnd eol) (bow bcnd bow) ( bcnd eow) (> bcnd eow) (bog bcnd bog) ( bcnd eog) (wb bcnd (or bow eow)) (b bcnd (or bow eow)) (nwb bcnd nwb) ( expr word) ( expr grapheme) (X expr grapheme) )) (define (ref-named-expr name o s) ;=> type, sre (cond ((o-lookup name o) => (lambda (p) (values (cadr p) (caddr p)))) (else (fail s (string-append "name not defined: " (symbol->string name)))))) ; definitions are wrapped into a ds structure with 2 extra slots to contain cached data; ; cache #1 is for string-sre->sre, cache #2 for string-sre->regexp (define (make-ds nes) (vector nes '() '())) (define (ds-nes ds) (vector-ref ds 0)) (define ds-cache vector-ref) ; index is 1 or 2 (define set-ds-cache! vector-set!) ; ditto ; simple cache for 16 most recently used ssre strings (define (cache-slot ds cno str) (define rcl (ds-cache ds cno)) (let loop ((cl rcl) (prevcl #f) (n 0)) (if (pair? cl) (if (string=? (caar cl) str) (car cl) (loop (cdr cl) cl (+ n 1))) (let ((cs (cons str #f))) (when (>= n 16) (set-cdr! prevcl '())) (set-ds-cache! ds cno (cons cs rcl)) cs)))) ; NB: We assume that parameters follow the protocol described in SRFI-39, namely that ; a parameter procedure can be called with a value argument to set the parameter globally. ; This behavior is not required by R7RS. (define string-sre-definitions (make-parameter (make-ds named-exprs))) (define (string-sre-bind n t e ds) (make-ds (cons (list n t e) (ds-nes ds)))) (define (string-sre-unbind n ds) (define (unbind n nes) (cond ((null? nes) nes) ((and (pair? nes) (pair? (car nes)) (eq? (caar nes) n)) (unbind n (cdr nes))) (else (cons (car nes) (string-sre-unbind n (cdr nes)))))) (make-ds (unbind n (ds-nes ds)))) (define (ssre-fancy-error str src msg args) (define p (- (string-length str) (length src))) (define m (string-append "string-sre->sre: " msg)) (when (>= p 0) ; todo: what if str is multi-line? pick p line only! (set! m (string-append m "\n" str "\n" (make-string p #\space) "^"))) (apply error m args)) (define (string-sre-syntax-error? x) (and (list? x) (= (length x) 4) (eq? (car x) 'string-sre->sre) (string? (cadr x)) (string? (caddr x)) (list? (cadddr x)))) (define (string-sre->sre str) (define ds (string-sre-definitions)) (define cs (cache-slot ds 1 str)) ; cache #1 is for string-sre->sre (or (cdr cs) (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (set-cdr! cs e) e)))) (define (string-sre->regexp str) (define ds (string-sre-definitions)) (define cs (cache-slot ds 2 str)) ; cache #2 is for string-sre->regexp (or (cdr cs) (guard (x ((string-sre-syntax-error? x) (apply ssre-fancy-error str (cdr x)))) (receive (e s) (parse-re-spec (string->list str) (cons 'u (ds-nes ds))) (when (pair? s) (fail s (string-append "unexpected terminator char: " (string (car s))))) (let ((re (regexp e))) (set-cdr! cs re) re)))))