move things around. the parser no longer adjusts nodes based on options, instead opting for nfa to handle it (case-insensitive, multiline, dotall, reversed..)
parent
e4a2b671d3
commit
384a11ecee
|
@ -5,12 +5,13 @@ IN: regexp.backend
|
|||
|
||||
TUPLE: regexp
|
||||
raw
|
||||
{ stack vector }
|
||||
parse-tree
|
||||
{ options hashtable }
|
||||
stack
|
||||
parse-tree
|
||||
nfa-table
|
||||
dfa-table
|
||||
minimized-table
|
||||
matchers
|
||||
{ nfa-traversal-flags hashtable }
|
||||
{ dfa-traversal-flags hashtable }
|
||||
{ state integer }
|
||||
|
|
|
@ -1,12 +1,25 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order symbols regexp.parser
|
||||
USING: accessors kernel math math.order symbols
|
||||
words regexp.utils unicode.categories combinators.short-circuit ;
|
||||
IN: regexp.classes
|
||||
|
||||
SINGLETONS: any-char any-char-no-nl
|
||||
letter-class LETTER-class Letter-class digit-class
|
||||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
unmatchable-class terminator-class word-boundary-class ;
|
||||
|
||||
SINGLETONS: beginning-of-input beginning-of-line
|
||||
end-of-input end-of-line ;
|
||||
|
||||
MIXIN: node
|
||||
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
||||
|
||||
GENERIC: class-member? ( obj class -- ? )
|
||||
|
||||
M: word class-member? ( obj class -- ? ) 2drop f ;
|
||||
M: t class-member? ( obj class -- ? ) 2drop f ;
|
||||
|
||||
M: integer class-member? ( obj class -- ? ) 2drop f ;
|
||||
|
||||
|
@ -18,7 +31,7 @@ M: any-char class-member? ( obj class -- ? )
|
|||
|
||||
M: any-char-no-nl class-member? ( obj class -- ? )
|
||||
drop CHAR: \n = not ;
|
||||
|
||||
|
||||
M: letter-class class-member? ( obj class -- ? )
|
||||
drop letter? ;
|
||||
|
||||
|
@ -70,3 +83,9 @@ M: terminator-class class-member? ( obj class -- ? )
|
|||
[ CHAR: \u002028 = ]
|
||||
[ CHAR: \u002029 = ]
|
||||
} 1|| ;
|
||||
|
||||
M: beginning-of-line class-member? ( obj class -- ? )
|
||||
2drop f ;
|
||||
|
||||
M: end-of-line class-member? ( obj class -- ? )
|
||||
2drop f ;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors arrays assocs grouping kernel regexp.backend
|
||||
locals math namespaces regexp.parser sequences fry quotations
|
||||
math.order math.ranges vectors unicode.categories regexp.utils
|
||||
regexp.transition-tables words sets ;
|
||||
regexp.transition-tables words sets regexp.classes unicode.case ;
|
||||
IN: regexp.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
|
@ -22,8 +22,13 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
|
|||
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
|
||||
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
|
||||
|
||||
: add-global-flag ( flag -- )
|
||||
current-regexp get nfa-table>> flags>> conjoin ;
|
||||
: options ( -- obj ) current-regexp get options>> ;
|
||||
|
||||
: option? ( obj -- ? ) options key? ;
|
||||
|
||||
: option-on ( obj -- ) options conjoin ;
|
||||
|
||||
: option-off ( obj -- ) options delete-at ;
|
||||
|
||||
: next-state ( regexp -- state )
|
||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||
|
@ -106,6 +111,7 @@ M: kleene-star nfa-node ( node -- )
|
|||
|
||||
M: concatenation nfa-node ( node -- )
|
||||
seq>>
|
||||
reversed-regexp option? [ <reversed> ] when
|
||||
[ [ nfa-node ] each ]
|
||||
[ length 1- [ concatenate-nodes ] times ] bi ;
|
||||
|
||||
|
@ -115,16 +121,59 @@ M: alternation nfa-node ( node -- )
|
|||
[ length 1- [ alternate-nodes ] times ] bi ;
|
||||
|
||||
M: constant nfa-node ( node -- )
|
||||
char>> literal-transition add-simple-entry ;
|
||||
case-insensitive option? [
|
||||
dup char>> [ ch>lower ] [ ch>upper ] bi
|
||||
2dup = [
|
||||
2drop
|
||||
char>> literal-transition add-simple-entry
|
||||
] [
|
||||
[ literal-transition add-simple-entry ] bi@
|
||||
alternate-nodes drop
|
||||
] if
|
||||
] [
|
||||
char>> literal-transition add-simple-entry
|
||||
] if ;
|
||||
|
||||
M: epsilon nfa-node ( node -- )
|
||||
drop eps literal-transition add-simple-entry ;
|
||||
|
||||
M: word nfa-node ( node -- )
|
||||
M: word nfa-node ( node -- ) class-transition add-simple-entry ;
|
||||
|
||||
M: any-char nfa-node ( node -- )
|
||||
[ dotall option? ] dip any-char-no-nl ?
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
! M: beginning-of-text nfa-node ( node -- ) ;
|
||||
|
||||
M: beginning-of-line nfa-node ( node -- ) class-transition add-simple-entry ;
|
||||
|
||||
M: end-of-line nfa-node ( node -- ) class-transition add-simple-entry ;
|
||||
|
||||
: choose-letter-class ( node -- node' )
|
||||
case-insensitive option? Letter-class rot ? ;
|
||||
|
||||
M: letter-class nfa-node ( node -- )
|
||||
choose-letter-class class-transition add-simple-entry ;
|
||||
|
||||
M: LETTER-class nfa-node ( node -- )
|
||||
choose-letter-class class-transition add-simple-entry ;
|
||||
|
||||
M: character-class-range nfa-node ( node -- )
|
||||
class-transition add-simple-entry ;
|
||||
case-insensitive option? [
|
||||
dup [ from>> ] [ to>> ] bi
|
||||
2dup [ Letter? ] bi@ and [
|
||||
rot drop
|
||||
[ [ ch>lower ] bi@ character-class-range boa ]
|
||||
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
|
||||
[ class-transition add-simple-entry ] bi@
|
||||
alternate-nodes
|
||||
] [
|
||||
2drop
|
||||
class-transition add-simple-entry
|
||||
] if
|
||||
] [
|
||||
class-transition add-simple-entry
|
||||
] if ;
|
||||
|
||||
M: capture-group nfa-node ( node -- )
|
||||
eps literal-transition add-simple-entry
|
||||
|
@ -141,26 +190,6 @@ M: non-capture-group nfa-node ( node -- )
|
|||
M: reluctant-kleene-star nfa-node ( node -- )
|
||||
term>> <kleene-star> nfa-node ;
|
||||
|
||||
M: beginning-of-line nfa-node ( node -- )
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
beginning-of-line add-global-flag ;
|
||||
|
||||
M: end-of-line nfa-node ( node -- )
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
end-of-line add-global-flag ;
|
||||
|
||||
M: beginning-of-input nfa-node ( node -- )
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
beginning-of-input add-global-flag ;
|
||||
|
||||
M: end-of-input nfa-node ( node -- )
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
end-of-input add-global-flag ;
|
||||
|
||||
M: negation nfa-node ( node -- )
|
||||
negation-mode inc
|
||||
term>> nfa-node
|
||||
|
@ -182,6 +211,10 @@ M: lookbehind nfa-node ( node -- )
|
|||
lookbehind-off add-traversal-flag
|
||||
2 [ concatenate-nodes ] times ;
|
||||
|
||||
M: option nfa-node ( node -- )
|
||||
[ option>> ] [ on?>> ] bi [ option-on ] [ option-off ] if
|
||||
eps literal-transition add-simple-entry ;
|
||||
|
||||
: construct-nfa ( regexp -- )
|
||||
[
|
||||
reset-regexp
|
||||
|
|
|
@ -19,8 +19,8 @@ IN: regexp.parser
|
|||
[ ] [ "(?:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?i:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?-i:a)" test-regexp ] unit-test
|
||||
[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
[ "(?z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with
|
||||
[ "(?-z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with
|
||||
|
||||
[ ] [ "(?=a)" test-regexp ] unit-test
|
||||
|
||||
|
|
|
@ -4,12 +4,11 @@ USING: accessors arrays assocs combinators io io.streams.string
|
|||
kernel math math.parser namespaces qualified sets
|
||||
quotations sequences splitting symbols vectors math.order
|
||||
unicode.categories strings regexp.backend regexp.utils
|
||||
unicode.case words locals ;
|
||||
unicode.case words locals regexp.classes ;
|
||||
IN: regexp.parser
|
||||
|
||||
FROM: math.ranges => [a,b] ;
|
||||
|
||||
MIXIN: node
|
||||
TUPLE: concatenation seq ; INSTANCE: concatenation node
|
||||
TUPLE: alternation seq ; INSTANCE: alternation node
|
||||
TUPLE: kleene-star term ; INSTANCE: kleene-star node
|
||||
|
@ -40,38 +39,31 @@ INSTANCE: independent-group parentheses-group
|
|||
TUPLE: comment-group term ; INSTANCE: comment-group node
|
||||
INSTANCE: comment-group parentheses-group
|
||||
|
||||
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
||||
SINGLETON: epsilon INSTANCE: epsilon node
|
||||
SINGLETON: any-char INSTANCE: any-char node
|
||||
SINGLETON: any-char-no-nl INSTANCE: any-char-no-nl node
|
||||
SINGLETON: beginning-of-input INSTANCE: beginning-of-input node
|
||||
SINGLETON: end-of-input INSTANCE: end-of-input node
|
||||
SINGLETON: beginning-of-line INSTANCE: beginning-of-line node
|
||||
SINGLETON: end-of-line INSTANCE: end-of-line node
|
||||
|
||||
TUPLE: option-on option ; INSTANCE: option-on node
|
||||
TUPLE: option-off option ; INSTANCE: option-off node
|
||||
TUPLE: option option on? ; INSTANCE: option node
|
||||
|
||||
SINGLETONS: unix-lines dotall multiline comments case-insensitive
|
||||
unicode-case reversed-regexp ;
|
||||
|
||||
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
unmatchable-class terminator-class word-boundary-class ;
|
||||
|
||||
SINGLETONS: beginning-of-group end-of-group
|
||||
beginning-of-character-class end-of-character-class
|
||||
SINGLETONS: beginning-of-character-class end-of-character-class
|
||||
left-parenthesis pipe caret dash ;
|
||||
|
||||
: get-option ( option -- ? ) current-regexp get options>> at ;
|
||||
: get-unix-lines ( -- ? ) unix-lines get-option ;
|
||||
: get-dotall ( -- ? ) dotall get-option ;
|
||||
: get-multiline ( -- ? ) multiline get-option ;
|
||||
: get-comments ( -- ? ) comments get-option ;
|
||||
: get-case-insensitive ( -- ? ) case-insensitive get-option ;
|
||||
: get-unicode-case ( -- ? ) unicode-case get-option ;
|
||||
: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
|
||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
|
||||
: drop1 ( -- ) read1 drop ;
|
||||
|
||||
: stack ( -- obj ) current-regexp get stack>> ;
|
||||
: change-whole-stack ( quot -- )
|
||||
current-regexp get
|
||||
[ stack>> swap call ] keep (>>stack) ; inline
|
||||
: push-stack ( obj -- ) stack push ;
|
||||
: pop-stack ( -- obj ) stack pop ;
|
||||
: cut-out ( vector n -- vector' vector ) cut rest ;
|
||||
ERROR: cut-stack-error ;
|
||||
: cut-stack ( obj vector -- vector' vector )
|
||||
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
|
||||
|
||||
: <possessive-kleene-star> ( obj -- kleene ) possessive-kleene-star boa ;
|
||||
: <reluctant-kleene-star> ( obj -- kleene ) reluctant-kleene-star boa ;
|
||||
|
@ -80,18 +72,11 @@ left-parenthesis pipe caret dash ;
|
|||
|
||||
: <negation> ( obj -- negation ) negation boa ;
|
||||
: <concatenation> ( seq -- concatenation )
|
||||
>vector get-reversed-regexp [ reverse ] when
|
||||
[ epsilon ] [ concatenation boa ] if-empty ;
|
||||
>vector [ epsilon ] [ concatenation boa ] if-empty ;
|
||||
: <alternation> ( seq -- alternation ) >vector alternation boa ;
|
||||
: <capture-group> ( obj -- capture-group ) capture-group boa ;
|
||||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||
: <constant> ( obj -- constant )
|
||||
dup Letter? get-case-insensitive and [
|
||||
[ ch>lower ] [ ch>upper ] bi
|
||||
[ constant boa ] bi@ 2array <alternation>
|
||||
] [
|
||||
constant boa
|
||||
] if ;
|
||||
: <constant> ( obj -- constant ) constant boa ;
|
||||
|
||||
: first|concatenation ( seq -- first/concatenation )
|
||||
dup length 1 = [ first ] [ <concatenation> ] if ;
|
||||
|
@ -100,21 +85,14 @@ left-parenthesis pipe caret dash ;
|
|||
dup length 1 = [ first ] [ <alternation> ] if ;
|
||||
|
||||
: <character-class-range> ( from to -- obj )
|
||||
2dup [ Letter? ] bi@ or get-case-insensitive and [
|
||||
[ [ ch>lower ] bi@ character-class-range boa ]
|
||||
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
|
||||
2array [ [ from>> ] [ to>> ] bi < ] filter
|
||||
[ unmatchable-class ] [ first|alternation ] if-empty
|
||||
] [
|
||||
2dup <
|
||||
[ character-class-range boa ] [ 2drop unmatchable-class ] if
|
||||
] if ;
|
||||
2dup <
|
||||
[ character-class-range boa ] [ 2drop unmatchable-class ] if ;
|
||||
|
||||
ERROR: unmatched-parentheses ;
|
||||
|
||||
ERROR: bad-option ch ;
|
||||
ERROR: unknown-regexp-option option ;
|
||||
|
||||
: option ( ch -- singleton )
|
||||
: ch>option ( ch -- singleton )
|
||||
{
|
||||
{ CHAR: i [ case-insensitive ] }
|
||||
{ CHAR: d [ unix-lines ] }
|
||||
|
@ -124,13 +102,21 @@ ERROR: bad-option ch ;
|
|||
{ CHAR: s [ dotall ] }
|
||||
{ CHAR: u [ unicode-case ] }
|
||||
{ CHAR: x [ comments ] }
|
||||
[ bad-option ]
|
||||
[ unknown-regexp-option ]
|
||||
} case ;
|
||||
|
||||
: option-on ( option -- ) current-regexp get options>> conjoin ;
|
||||
: option-off ( option -- ) current-regexp get options>> delete-at ;
|
||||
: option>ch ( option -- string )
|
||||
{
|
||||
{ case-insensitive [ CHAR: i ] }
|
||||
{ multiline [ CHAR: m ] }
|
||||
{ reversed-regexp [ CHAR: r ] }
|
||||
{ dotall [ CHAR: s ] }
|
||||
[ unknown-regexp-option ]
|
||||
} case ;
|
||||
|
||||
: toggle-option ( ch ? -- )
|
||||
[ ch>option ] dip option boa push-stack ;
|
||||
|
||||
: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
|
||||
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
|
||||
|
||||
: parse-options ( string -- )
|
||||
|
@ -176,7 +162,7 @@ DEFER: (parse-regexp)
|
|||
[ drop1 (parse-special-group) ]
|
||||
[ capture-group f nested-parse-regexp ] if ;
|
||||
|
||||
: handle-dot ( -- ) get-dotall any-char any-char-no-nl ? push-stack ;
|
||||
: handle-dot ( -- ) any-char push-stack ;
|
||||
: handle-pipe ( -- ) pipe push-stack ;
|
||||
: (handle-star) ( obj -- kleene-star )
|
||||
peek1 {
|
||||
|
@ -234,11 +220,8 @@ ERROR: invalid-range a b ;
|
|||
[ [ nip at-most-n ] [ at-least-n ] if* ] if
|
||||
] [ drop 0 max exactly-n ] if ;
|
||||
|
||||
: handle-front-anchor ( -- )
|
||||
get-multiline beginning-of-line beginning-of-input ? push-stack ;
|
||||
|
||||
: handle-back-anchor ( -- )
|
||||
get-multiline end-of-line end-of-input ? push-stack ;
|
||||
: handle-front-anchor ( -- ) beginning-of-line push-stack ;
|
||||
: handle-back-anchor ( -- ) end-of-line push-stack ;
|
||||
|
||||
ERROR: bad-character-class obj ;
|
||||
ERROR: expected-posix-class ;
|
||||
|
@ -247,8 +230,8 @@ ERROR: expected-posix-class ;
|
|||
read1 CHAR: { = [ expected-posix-class ] unless
|
||||
"}" read-until [ bad-character-class ] unless
|
||||
{
|
||||
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
|
||||
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
|
||||
{ "Lower" [ letter-class ] }
|
||||
{ "Upper" [ LETTER-class ] }
|
||||
{ "Alpha" [ Letter-class ] }
|
||||
{ "ASCII" [ ascii-class ] }
|
||||
{ "Digit" [ digit-class ] }
|
||||
|
@ -412,7 +395,8 @@ DEFER: handle-left-bracket
|
|||
[ first|concatenation ] map first|alternation ;
|
||||
|
||||
: handle-right-parenthesis ( -- )
|
||||
stack dup [ parentheses-group "members" word-prop member? ] find-last -rot cut rest
|
||||
stack dup [ parentheses-group "members" word-prop member? ] find-last
|
||||
-rot cut rest
|
||||
[ [ push ] keep current-regexp get (>>stack) ]
|
||||
[ finish-regexp-parse push-stack ] bi* ;
|
||||
|
||||
|
@ -429,12 +413,9 @@ DEFER: handle-left-bracket
|
|||
{ CHAR: [ [ handle-left-bracket t ] }
|
||||
{ CHAR: \ [ handle-escape t ] }
|
||||
[
|
||||
dup CHAR: $ = peek1 f = and [
|
||||
drop
|
||||
handle-back-anchor f
|
||||
] [
|
||||
push-constant t
|
||||
] if
|
||||
dup CHAR: $ = peek1 f = and
|
||||
[ drop handle-back-anchor f ]
|
||||
[ push-constant t ] if
|
||||
]
|
||||
} case ;
|
||||
|
||||
|
@ -451,7 +432,6 @@ DEFER: handle-left-bracket
|
|||
parse-regexp-beginning (parse-regexp)
|
||||
] with-input-stream
|
||||
] unless-empty
|
||||
current-regexp get
|
||||
stack finish-regexp-parse
|
||||
>>parse-tree drop
|
||||
current-regexp get [ finish-regexp-parse ] change-stack
|
||||
dup stack>> >>parse-tree drop
|
||||
] with-variable ;
|
||||
|
|
|
@ -238,7 +238,7 @@ IN: regexp-tests
|
|||
|
||||
[ t ] [ "abc" <reversed> R/ abc/r matches? ] unit-test
|
||||
[ t ] [ "abc" <reversed> R/ a[bB][cC]/r matches? ] unit-test
|
||||
[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/r matches? ] unit-test
|
||||
[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
|
@ -307,17 +307,30 @@ IN: regexp-tests
|
|||
! Bug in parsing word
|
||||
[ t ] [ "a" R' a' matches? ] unit-test
|
||||
|
||||
! Convert to lowercase until E
|
||||
[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
|
||||
[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test
|
||||
|
||||
! Convert to uppercase until E
|
||||
[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test
|
||||
[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test
|
||||
|
||||
! [ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
|
||||
|
||||
[ t ] [ "a" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\ra" R/ ^a/ matches? ] unit-test
|
||||
! [ t ] [ "a" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\n" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\r" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
|
||||
! [ t ] [ "a" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\n" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\r" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ \Aa/ matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
|
||||
|
@ -347,14 +360,6 @@ IN: regexp-tests
|
|||
! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test
|
||||
! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test
|
||||
|
||||
! Convert to lowercase until E
|
||||
[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
|
||||
[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test
|
||||
|
||||
! Convert to uppercase until E
|
||||
[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test
|
||||
[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" "a$" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\n" "a$" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\r" "a$" R/ a$/m matches? ] unit-test
|
||||
|
|
|
@ -16,6 +16,7 @@ IN: regexp
|
|||
H{ } clone >>nfa-traversal-flags
|
||||
H{ } clone >>dfa-traversal-flags
|
||||
H{ } clone >>options
|
||||
H{ } clone >>matchers
|
||||
reset-regexp ;
|
||||
|
||||
: construct-regexp ( regexp -- regexp' )
|
||||
|
@ -93,26 +94,6 @@ IN: regexp
|
|||
{ "R| " "|" }
|
||||
} swap [ subseq? not nip ] curry assoc-find drop ;
|
||||
|
||||
ERROR: unknown-regexp-option option ;
|
||||
|
||||
: option>ch ( option -- string )
|
||||
{
|
||||
{ case-insensitive [ CHAR: i ] }
|
||||
{ multiline [ CHAR: m ] }
|
||||
{ reversed-regexp [ CHAR: r ] }
|
||||
{ dotall [ CHAR: s ] }
|
||||
[ unknown-regexp-option ]
|
||||
} case ;
|
||||
|
||||
: ch>option ( ch -- option )
|
||||
{
|
||||
{ CHAR: i [ case-insensitive ] }
|
||||
{ CHAR: m [ multiline ] }
|
||||
{ CHAR: r [ reversed-regexp ] }
|
||||
{ CHAR: s [ dotall ] }
|
||||
[ unknown-regexp-option ]
|
||||
} case ;
|
||||
|
||||
: string>options ( string -- options )
|
||||
[ ch>option dup ] H{ } map>assoc ;
|
||||
|
||||
|
|
|
@ -26,23 +26,6 @@ IN: regexp.utils
|
|||
: ?insert-at ( value key hash/f -- hash )
|
||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||
|
||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
|
||||
: drop1 ( -- ) read1 drop ;
|
||||
|
||||
: stack ( -- obj ) current-regexp get stack>> ;
|
||||
: change-whole-stack ( quot -- )
|
||||
current-regexp get
|
||||
[ stack>> swap call ] keep (>>stack) ; inline
|
||||
: push-stack ( obj -- ) stack push ;
|
||||
: pop-stack ( -- obj ) stack pop ;
|
||||
: cut-out ( vector n -- vector' vector ) cut rest ;
|
||||
ERROR: cut-stack-error ;
|
||||
: cut-stack ( obj vector -- vector' vector )
|
||||
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
|
||||
|
||||
ERROR: bad-octal number ;
|
||||
ERROR: bad-hex number ;
|
||||
: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
|
||||
|
|
Loading…
Reference in New Issue