Lookaround and anchors work! (still need to fix some bugs)
parent
39011fd062
commit
a487ed0f32
|
@ -30,6 +30,7 @@ IN: regexp.classes.tests
|
|||
[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
|
||||
[ f ] [ t <not-class> ] unit-test
|
||||
[ t ] [ f <not-class> ] unit-test
|
||||
[ f ] [ 1 <not-class> 1 t replace-question ] unit-test
|
||||
|
||||
! Making classes into nested conditionals
|
||||
|
||||
|
@ -43,7 +44,7 @@ IN: regexp.classes.tests
|
|||
SYMBOL: foo
|
||||
SYMBOL: bar
|
||||
|
||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 2 T{ primitive-class f foo } } { 3 T{ primitive-class f bar } } } table>condition ] unit-test
|
||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 3 2 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 3 T{ primitive-class f bar } } { 2 T{ primitive-class f foo } } } table>condition ] unit-test
|
||||
|
||||
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
|
||||
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
|
||||
|
|
|
@ -12,7 +12,7 @@ ascii-class punctuation-class java-printable-class blank-class
|
|||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
unmatchable-class terminator-class word-boundary-class ;
|
||||
|
||||
SINGLETONS: beginning-of-input ^ end-of-input $ ;
|
||||
SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file ;
|
||||
|
||||
TUPLE: range from to ;
|
||||
C: <range> range
|
||||
|
@ -233,7 +233,7 @@ M: or-class replace-question
|
|||
replace-compound <or-class> ;
|
||||
|
||||
M: not-class replace-question
|
||||
class>> replace-question <not-class> ;
|
||||
[ class>> ] 2dip replace-question <not-class> ;
|
||||
|
||||
: answer ( table question answer -- new-table )
|
||||
'[ _ _ replace-question ] assoc-map
|
||||
|
@ -258,7 +258,7 @@ M: not-class class>questions class>> class>questions ;
|
|||
M: object class>questions 1array ;
|
||||
|
||||
: table>questions ( table -- questions )
|
||||
values <and-class> class>questions t swap remove ;
|
||||
values [ class>questions ] gather >array t swap remove ;
|
||||
|
||||
: table>condition ( table -- condition )
|
||||
! input table is state => class
|
||||
|
@ -269,3 +269,12 @@ M: object class>questions 1array ;
|
|||
[ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
|
||||
'[ _ condition-map ] bi@ <condition>
|
||||
] [ call ] if ; inline recursive
|
||||
|
||||
: condition-states ( condition -- states )
|
||||
dup condition? [
|
||||
[ yes>> ] [ no>> ] bi
|
||||
[ condition-states ] bi@ append prune
|
||||
] [ 1array ] if ;
|
||||
|
||||
: condition-at ( condition assoc -- new-condition )
|
||||
'[ _ at ] condition-map ;
|
||||
|
|
|
@ -3,27 +3,76 @@
|
|||
USING: regexp.classes kernel sequences regexp.negation
|
||||
quotations regexp.minimize assocs fry math locals combinators
|
||||
accessors words compiler.units kernel.private strings
|
||||
sequences.private arrays regexp.matchers call ;
|
||||
sequences.private arrays regexp.matchers call namespaces
|
||||
regexp.transition-tables combinators.short-circuit ;
|
||||
IN: regexp.compiler
|
||||
|
||||
: literals>cases ( literal-transitions -- case-body )
|
||||
[ 1quotation ] assoc-map ;
|
||||
GENERIC: question>quot ( question -- quot )
|
||||
|
||||
<PRIVATE
|
||||
|
||||
SYMBOL: shortest?
|
||||
SYMBOL: backwards?
|
||||
|
||||
M: t question>quot drop [ 2drop t ] ;
|
||||
|
||||
M: beginning-of-input question>quot
|
||||
drop [ drop zero? ] ;
|
||||
|
||||
M: end-of-input question>quot
|
||||
drop [ length = ] ;
|
||||
|
||||
M: end-of-file question>quot
|
||||
drop [
|
||||
{
|
||||
[ length swap - 2 <= ]
|
||||
[ swap tail { "\n" "\r\n" "\r" "" } member? ]
|
||||
} 2&&
|
||||
[ [ nip [ length ] keep ] when ] keep
|
||||
] ;
|
||||
|
||||
M: $ question>quot
|
||||
drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
|
||||
|
||||
M: ^ question>quot
|
||||
drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
|
||||
|
||||
! Maybe the condition>quot things can be combined, given a suitable method
|
||||
! for question>quot on classes, but maybe that'd make stack shuffling annoying
|
||||
|
||||
: execution-quot ( next-state -- quot )
|
||||
! The conditions here are for lookaround and anchors, etc
|
||||
dup condition? [
|
||||
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
|
||||
[ execution-quot ] bi@
|
||||
'[ 2dup @ _ _ if ]
|
||||
] [
|
||||
! There shouldn't be a condition like this!
|
||||
dup sequence?
|
||||
[ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
|
||||
[ '[ _ execute ] ] if
|
||||
] if ;
|
||||
|
||||
TUPLE: box contents ;
|
||||
C: <box> box
|
||||
|
||||
: condition>quot ( condition -- quot )
|
||||
! Conditions here are for different classes
|
||||
dup condition? [
|
||||
[ question>> ] [ yes>> ] [ no>> ] tri
|
||||
[ condition>quot ] bi@
|
||||
'[ dup _ class-member? _ _ if ]
|
||||
] [
|
||||
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
|
||||
contents>>
|
||||
[ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
|
||||
] if ;
|
||||
|
||||
: new-non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
table>condition condition>quot ;
|
||||
|
||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
|
||||
[ 3drop ] suffix '[ _ cond ] ;
|
||||
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
||||
table>condition [ <box> ] condition-map condition>quot ;
|
||||
|
||||
: literals>cases ( literal-transitions -- case-body )
|
||||
[ execution-quot ] assoc-map ;
|
||||
|
||||
: expand-one-or ( or-class transition -- alist )
|
||||
[ seq>> ] dip '[ _ 2array ] map ;
|
||||
|
@ -38,17 +87,22 @@ IN: regexp.compiler
|
|||
>alist expand-or [ first integer? ] partition
|
||||
[ literals>cases ] [ non-literals>dispatch ] bi* ;
|
||||
|
||||
:: step ( last-match index str case-body final? -- last-index/f )
|
||||
:: step ( last-match index str quot final? direction -- last-index/f )
|
||||
final? index last-match ?
|
||||
index str bounds-check? [
|
||||
index 1+ str
|
||||
index direction + str
|
||||
index str nth-unsafe
|
||||
case-body case
|
||||
quot call
|
||||
] when ; inline
|
||||
|
||||
: direction ( -- n )
|
||||
backwards? get -1 1 ? ;
|
||||
|
||||
: transitions>quot ( transitions final-state? -- quot )
|
||||
[ split-literals suffix ] dip
|
||||
'[ { array-capacity sequence } declare _ _ step ] ;
|
||||
dup shortest? get and [ 2drop [ drop nip ] ] [
|
||||
[ split-literals swap case>quot ] dip direction
|
||||
'[ { array-capacity string } declare _ _ _ step ]
|
||||
] if ;
|
||||
|
||||
: word>quot ( word dfa -- quot )
|
||||
[ transitions>> at ]
|
||||
|
@ -64,30 +118,37 @@ IN: regexp.compiler
|
|||
] each
|
||||
] with-compilation-unit ;
|
||||
|
||||
: transitions-at ( transitions assoc -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ _ at ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: states>words ( dfa -- words dfa )
|
||||
dup transitions>> keys [ gensym ] H{ } map>assoc
|
||||
[ [ transitions-at ] rewrite-transitions ]
|
||||
[ transitions-at ]
|
||||
[ values ]
|
||||
bi swap ;
|
||||
|
||||
: dfa>word ( dfa -- word )
|
||||
states>words [ states>code ] keep start-state>> ;
|
||||
|
||||
: check-sequence ( string -- string )
|
||||
: check-string ( string -- string )
|
||||
! Make this configurable
|
||||
dup sequence? [ "String required" throw ] unless ;
|
||||
dup string? [ "String required" throw ] unless ;
|
||||
|
||||
: run-regexp ( start-index string word -- ? )
|
||||
{ [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
|
||||
: setup-regexp ( start-index string -- f start-index string )
|
||||
[ f ] [ >fixnum ] [ check-string ] tri* ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
! The quotation returned is ( start-index string -- i/f )
|
||||
|
||||
: dfa>quotation ( dfa -- quot )
|
||||
dfa>word '[ _ run-regexp ] ;
|
||||
dfa>word execution-quot '[ setup-regexp @ ] ;
|
||||
|
||||
: dfa>shortest-quotation ( dfa -- quot )
|
||||
t shortest? [ dfa>quotation ] with-variable ;
|
||||
|
||||
: dfa>reverse-quotation ( dfa -- quot )
|
||||
t backwards? [ dfa>quotation ] with-variable ;
|
||||
|
||||
: dfa>reverse-shortest-quotation ( dfa -- quot )
|
||||
t backwards? [ dfa>shortest-quotation ] with-variable ;
|
||||
|
||||
TUPLE: quot-matcher quot ;
|
||||
C: <quot-matcher> quot-matcher
|
||||
|
|
|
@ -39,21 +39,26 @@ IN: regexp.dfa
|
|||
|
||||
: find-transitions ( dfa-state nfa -- next-dfa-state )
|
||||
transitions>>
|
||||
'[ _ at keys ] gather
|
||||
epsilon swap remove ;
|
||||
'[ _ at keys [ condition-states ] map concat ] gather
|
||||
[ tagged-epsilon? not ] filter ;
|
||||
|
||||
: add-todo-state ( state visited-states new-states -- )
|
||||
3dup drop key? [ 3drop ] [
|
||||
[ conjoin ] [ push ] bi-curry* bi
|
||||
] if ;
|
||||
|
||||
: add-todo-states ( state/condition visited-states new-states -- )
|
||||
[ condition-states ] 2dip
|
||||
'[ _ _ add-todo-state ] each ;
|
||||
|
||||
:: new-transitions ( nfa dfa new-states visited-states -- nfa dfa )
|
||||
new-states [ nfa dfa ] [
|
||||
pop :> state
|
||||
state dfa transitions>> maybe-initialize-key
|
||||
state nfa find-transitions
|
||||
[| trans |
|
||||
state trans nfa find-closure :> new-state
|
||||
new-state visited-states new-states add-todo-state
|
||||
new-state visited-states new-states add-todo-states
|
||||
state new-state trans dfa set-transition
|
||||
] each
|
||||
nfa dfa new-states visited-states new-transitions
|
||||
|
@ -73,7 +78,7 @@ IN: regexp.dfa
|
|||
|
||||
: construct-dfa ( nfa -- dfa )
|
||||
dup initialize-dfa
|
||||
dup start-state>> 1vector
|
||||
dup start-state>> condition-states >vector
|
||||
H{ } clone
|
||||
new-transitions
|
||||
[ set-final-states ] keep ;
|
||||
|
|
|
@ -42,6 +42,3 @@ TUPLE: parts in out ;
|
|||
] preserving-epsilon
|
||||
] assoc-map
|
||||
] change-transitions ;
|
||||
|
||||
: nfa>dfa ( nfa -- dfa )
|
||||
disambiguate construct-dfa minimize ;
|
||||
|
|
|
@ -5,29 +5,11 @@ accessors locals math sorting arrays sets hashtables regexp.dfa
|
|||
combinators.short-circuit regexp.classes ;
|
||||
IN: regexp.minimize
|
||||
|
||||
: number-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ [ _ at ] condition-map ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: table>state-numbers ( table -- assoc )
|
||||
transitions>> keys <enum> [ swap ] H{ } assoc-map-as ;
|
||||
|
||||
: map-set ( assoc quot -- new-assoc )
|
||||
'[ drop @ dup ] assoc-map ; inline
|
||||
|
||||
: rewrite-transitions ( transition-table assoc quot -- transition-table )
|
||||
[
|
||||
[ clone ] dip
|
||||
[ '[ _ at ] change-start-state ]
|
||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||
[ ] tri
|
||||
] dip '[ _ @ ] change-transitions ; inline
|
||||
|
||||
: number-states ( table -- newtable )
|
||||
dup table>state-numbers
|
||||
[ number-transitions ] rewrite-transitions ;
|
||||
dup table>state-numbers transitions-at ;
|
||||
|
||||
: no-conditions? ( state transition-table -- ? )
|
||||
transitions>> at values [ condition? ] any? not ;
|
||||
|
@ -103,4 +85,4 @@ IN: regexp.minimize
|
|||
[ combine-transitions ] rewrite-transitions ;
|
||||
|
||||
: minimize ( table -- minimal-table )
|
||||
clone number-states combine-states ;
|
||||
clone number-states ; ! combine-states ;
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: regexp.nfa regexp.disambiguate kernel sequences
|
||||
assocs regexp.classes hashtables accessors fry vectors
|
||||
regexp.ast regexp.transition-tables regexp.minimize namespaces ;
|
||||
regexp.ast regexp.transition-tables regexp.minimize
|
||||
regexp.dfa namespaces ;
|
||||
IN: regexp.negation
|
||||
|
||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||
construct-nfa nfa>dfa ;
|
||||
construct-nfa disambiguate construct-dfa minimize ;
|
||||
|
||||
CONSTANT: fail-state -1
|
||||
|
||||
|
@ -33,15 +34,9 @@ CONSTANT: fail-state -1
|
|||
[ add-fail-state ] change-transitions
|
||||
dup inverse-final-states >>final-states ;
|
||||
|
||||
: renumber-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ [ _ at ] map ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: renumber-states ( transition-table -- transition-table )
|
||||
dup transitions>> keys [ next-state ] H{ } map>assoc
|
||||
[ renumber-transitions ] rewrite-transitions ;
|
||||
transitions-at ;
|
||||
|
||||
: box-transitions ( transition-table -- transition-table )
|
||||
[ [ [ 1vector ] assoc-map ] assoc-map ] change-transitions ;
|
||||
|
|
|
@ -56,9 +56,16 @@ M:: star nfa-node ( node -- start end )
|
|||
s2 s3 ;
|
||||
|
||||
GENERIC: modify-epsilon ( tag -- newtag )
|
||||
! Potential off-by-one errors when lookaround nested in lookbehind
|
||||
|
||||
M: object modify-epsilon ;
|
||||
|
||||
M: $ modify-epsilon
|
||||
multiline option? [ drop end-of-input ] unless ;
|
||||
|
||||
M: ^ modify-epsilon
|
||||
multiline option? [ drop beginning-of-input ] unless ;
|
||||
|
||||
M: tagged-epsilon nfa-node
|
||||
clone [ modify-epsilon ] change-tag add-simple-entry ;
|
||||
|
||||
|
|
|
@ -54,6 +54,7 @@ ERROR: bad-class name ;
|
|||
{ CHAR: D [ digit-class <primitive-class> <not-class> ] }
|
||||
|
||||
{ CHAR: z [ end-of-input <tagged-epsilon> ] }
|
||||
{ CHAR: Z [ end-of-file <tagged-epsilon> ] }
|
||||
{ CHAR: A [ beginning-of-input <tagged-epsilon> ] }
|
||||
[ ]
|
||||
} case ;
|
||||
|
|
|
@ -45,9 +45,9 @@ IN: regexp-tests
|
|||
! Dotall mode -- when on, . matches newlines.
|
||||
! Off by default.
|
||||
[ f ] [ "\n" "." <regexp> matches? ] unit-test
|
||||
! [ t ] [ "\n" "(?s)." <regexp> matches? ] unit-test
|
||||
[ t ] [ "\n" "(?s:.)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "\n" R/ ./s matches? ] unit-test
|
||||
! [ f ] [ "\n\n" "(?s).(?-s)." <regexp> matches? ] unit-test
|
||||
[ f ] [ "\n\n" "(?s:.)." <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
|
||||
|
@ -221,17 +221,15 @@ IN: regexp-tests
|
|||
[ t ] [ "c" R/ [A-Z]/i matches? ] unit-test
|
||||
[ f ] [ "3" R/ [A-Z]/i matches? ] unit-test
|
||||
|
||||
/*
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?i:a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i:a)" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ (?-i)a/i matches? ] unit-test
|
||||
[ t ] [ "a" R/ (?-i)a/i matches? ] unit-test
|
||||
[ f ] [ "A" R/ (?-i)a/i matches? ] unit-test
|
||||
[ f ] [ "A" R/ (?-i)a/i matches? ] unit-test
|
||||
*/
|
||||
[ t ] [ "a" R/ (?-i:a)/i matches? ] unit-test
|
||||
[ t ] [ "a" R/ (?-i:a)/i matches? ] unit-test
|
||||
[ f ] [ "A" R/ (?-i:a)/i matches? ] unit-test
|
||||
[ f ] [ "A" R/ (?-i:a)/i matches? ] unit-test
|
||||
|
||||
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" R/ [a-z]/i matches? ] unit-test
|
||||
|
@ -242,8 +240,8 @@ IN: regexp-tests
|
|||
[ t ] [ "abc" reverse R/ abc/r matches? ] unit-test
|
||||
[ t ] [ "abc" reverse R/ a[bB][cC]/r matches? ] unit-test
|
||||
|
||||
[ t ] [ "xabc" R/ abc/ <reverse-matcher> match-index-head >boolean ] unit-test
|
||||
[ t ] [ "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-head >boolean ] unit-test
|
||||
[ t ] [ 3 "xabc" R/ abc/ <reverse-matcher> match-index-from >boolean ] unit-test
|
||||
[ t ] [ 3 "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-from >boolean ] unit-test
|
||||
|
||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
|
@ -276,10 +274,6 @@ IN: regexp-tests
|
|||
|
||||
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
|
||||
|
||||
! [ t ] [ "a:b" ".+:?" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 1 ] [ "hello" ".+?" <regexp> match length ] unit-test
|
||||
|
||||
[ { "1" "2" "3" "4" } ]
|
||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||
|
||||
|
@ -304,18 +298,16 @@ IN: regexp-tests
|
|||
|
||||
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||
|
||||
/*
|
||||
[ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
|
||||
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test
|
||||
|
||||
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
||||
[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
||||
*/
|
||||
|
||||
! Bug in parsing word
|
||||
[ t ] [ "a" R' a' matches? ] unit-test
|
||||
|
@ -349,56 +341,70 @@ IN: regexp-tests
|
|||
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
|
||||
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
|
||||
! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test
|
||||
[ t ] [ "a" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
|
||||
[ f ] [ "\ra" R/ ^a/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\n" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\r" R/ a$/ matches? ] unit-test
|
||||
! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
|
||||
[ 1 ] [ "a" R/ ^a/ count-matches ] unit-test
|
||||
[ 0 ] [ "\na" R/ ^a/ count-matches ] unit-test
|
||||
[ 0 ] [ "\r\na" R/ ^a/ count-matches ] unit-test
|
||||
[ 0 ] [ "\ra" R/ ^a/ count-matches ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test
|
||||
! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test
|
||||
[ t ] [ "a" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\n" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\r" R/ a$/ matches? ] unit-test
|
||||
[ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ \Aa/ matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
|
||||
! [ f ] [ "\r\na" R/ \Aa/ matches? ] unit-test
|
||||
! [ f ] [ "\ra" R/ \Aa/ matches? ] unit-test
|
||||
[ 1 ] [ "a" R/ a$/ count-matches ] unit-test
|
||||
[ 0 ] [ "a\n" R/ a$/ count-matches ] unit-test
|
||||
[ 0 ] [ "a\r" R/ a$/ count-matches ] unit-test
|
||||
[ 0 ] [ "a\r\n" R/ a$/ count-matches ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ \Aa/m matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ \Aaa/m matches? ] unit-test
|
||||
! [ f ] [ "\r\na" R/ \Aa/m matches? ] unit-test
|
||||
! [ f ] [ "\ra" R/ \Aa/m matches? ] unit-test
|
||||
[ t ] [ "a" R/ a$|b$/ matches? ] unit-test
|
||||
[ t ] [ "b" R/ a$|b$/ matches? ] unit-test
|
||||
[ f ] [ "ab" R/ a$|b$/ matches? ] unit-test
|
||||
[ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
|
||||
[ t ] [ "a" R/ \Aa/ matches? ] unit-test
|
||||
[ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
|
||||
[ f ] [ "\r\na" R/ \Aa/ matches? ] unit-test
|
||||
[ f ] [ "\ra" R/ \Aa/ matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ \Aa\z/m matches? ] unit-test
|
||||
! [ f ] [ "a\n" R/ \Aa\z/m matches? ] unit-test
|
||||
[ t ] [ "a" R/ \Aa/m matches? ] unit-test
|
||||
[ f ] [ "\na" R/ \Aaa/m matches? ] unit-test
|
||||
[ f ] [ "\r\na" R/ \Aa/m matches? ] unit-test
|
||||
[ f ] [ "\ra" R/ \Aa/m matches? ] unit-test
|
||||
[ 0 ] [ "\ra" R/ \Aa/m count-matches ] unit-test
|
||||
|
||||
! [ t ] [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
! [ t ] [ "a\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
[ f ] [ "\r\n\n\n\nam" R/ ^am/m matches? ] unit-test
|
||||
[ 1 ] [ "\r\n\n\n\nam" R/ ^am/m count-matches ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ \Aa\Z/m matches? ] unit-test
|
||||
! [ f ] [ "\na" R/ \Aaa\Z/m matches? ] unit-test
|
||||
! [ f ] [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
|
||||
! [ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
||||
[ t ] [ "a" R/ \Aa\z/m matches? ] unit-test
|
||||
[ f ] [ "a\n" R/ \Aa\z/m matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ ^a/m matches? ] unit-test
|
||||
! [ t ] [ "\na" R/ ^a/m matches? ] unit-test
|
||||
! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test
|
||||
! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test
|
||||
[ t ] [ "a\r\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
[ t ] [ "a\n" R/ \Aa\Z/m matches? ] unit-test
|
||||
|
||||
! [ t ] [ "a" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\n" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\r" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\r\n" R/ a$/m matches? ] unit-test
|
||||
[ t ] [ "a" R/ \Aa\Z/m matches? ] unit-test
|
||||
[ f ] [ "\na" R/ \Aaa\Z/m matches? ] unit-test
|
||||
[ f ] [ "\r\na" R/ \Aa\Z/m matches? ] unit-test
|
||||
[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
[ t ] [ "a" R/ ^a/m matches? ] unit-test
|
||||
[ f ] [ "\na" R/ ^a/m matches? ] unit-test
|
||||
[ 1 ] [ "\na" R/ ^a/m count-matches ] unit-test
|
||||
[ 1 ] [ "\r\na" R/ ^a/m count-matches ] unit-test
|
||||
[ 1 ] [ "\ra" R/ ^a/m count-matches ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ a$/m matches? ] unit-test
|
||||
[ f ] [ "a\n" R/ a$/m matches? ] unit-test
|
||||
[ 1 ] [ "a\n" R/ a$/m count-matches ] unit-test
|
||||
[ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
|
||||
[ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
|
||||
|
||||
[ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
[ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
|
||||
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors combinators kernel math sequences strings sets
|
||||
assocs prettyprint.backend prettyprint.custom make lexer
|
||||
namespaces parser arrays fry locals regexp.minimize
|
||||
regexp.parser regexp.nfa regexp.dfa
|
||||
regexp.parser regexp.nfa regexp.dfa regexp.classes
|
||||
regexp.transition-tables splitting sorting regexp.ast
|
||||
regexp.negation regexp.matchers regexp.compiler ;
|
||||
IN: regexp
|
||||
|
@ -27,6 +27,7 @@ TUPLE: regexp
|
|||
|
||||
TUPLE: reverse-matcher regexp ;
|
||||
C: <reverse-matcher> reverse-matcher
|
||||
! Reverse matchers won't work properly with most combinators, for now
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
@ -39,21 +40,31 @@ C: <reverse-matcher> reverse-matcher
|
|||
: <reversed-option> ( ast -- reversed )
|
||||
"r" string>options <with-options> ;
|
||||
|
||||
M: lookahead question>quot ! Returns ( index string -- ? )
|
||||
term>> ast>dfa dfa>shortest-quotation ;
|
||||
|
||||
M: lookbehind question>quot ! Returns ( index string -- ? )
|
||||
term>> <reversed-option>
|
||||
ast>dfa dfa>reverse-shortest-quotation
|
||||
[ [ 1- ] dip ] prepose ;
|
||||
|
||||
: compile-reverse ( regexp -- regexp )
|
||||
dup '[
|
||||
[
|
||||
_ get-ast <reversed-option>
|
||||
ast>dfa dfa>quotation
|
||||
ast>dfa dfa>reverse-quotation
|
||||
] unless*
|
||||
] change-reverse-dfa ;
|
||||
|
||||
M: regexp match-index-from ( string regexp -- index/f )
|
||||
M: regexp match-index-from
|
||||
compile-regexp dfa>> <quot-matcher> match-index-from ;
|
||||
|
||||
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
||||
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
||||
M: reverse-matcher match-index-from
|
||||
regexp>> compile-reverse reverse-dfa>>
|
||||
<quot-matcher> match-index-from ;
|
||||
|
||||
! The following two should do some caching
|
||||
|
||||
: find-regexp-syntax ( string -- prefix suffix )
|
||||
{
|
||||
{ "R/ " "/" }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||
vectors locals ;
|
||||
vectors locals regexp.classes ;
|
||||
IN: regexp.transition-tables
|
||||
|
||||
TUPLE: transition-table transitions start-state final-states ;
|
||||
|
@ -12,10 +12,11 @@ TUPLE: transition-table transitions start-state final-states ;
|
|||
H{ } clone >>final-states ;
|
||||
|
||||
: maybe-initialize-key ( key hashtable -- )
|
||||
! Why do we have to do this?
|
||||
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
||||
|
||||
:: (set-transition) ( from to obj hash -- )
|
||||
to hash maybe-initialize-key
|
||||
to condition? [ to hash maybe-initialize-key ] unless
|
||||
from hash at
|
||||
[ [ to obj ] dip set-at ]
|
||||
[ to obj associate from hash set-at ] if* ;
|
||||
|
@ -31,3 +32,23 @@ TUPLE: transition-table transitions start-state final-states ;
|
|||
|
||||
: add-transition ( from to obj transition-table -- )
|
||||
transitions>> (add-transition) ;
|
||||
|
||||
: map-set ( assoc quot -- new-assoc )
|
||||
'[ drop @ dup ] assoc-map ; inline
|
||||
|
||||
: rewrite-transitions ( transition-table assoc quot -- transition-table )
|
||||
[
|
||||
[ clone ] dip
|
||||
[ '[ _ condition-at ] change-start-state ]
|
||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||
[ ] tri
|
||||
] dip '[ _ @ ] change-transitions ; inline
|
||||
|
||||
: number-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ _ condition-at ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: transitions-at ( transitions numbering -- transitions )
|
||||
[ number-transitions ] rewrite-transitions ;
|
||||
|
|
Loading…
Reference in New Issue