Unfinished changes for regexp lookaround
parent
4f306518dc
commit
ca19a1b728
|
@ -3,6 +3,8 @@
|
|||
USING: regexp.classes tools.test arrays kernel ;
|
||||
IN: regexp.classes.tests
|
||||
|
||||
! Class algebra
|
||||
|
||||
[ f ] [ { 1 2 } <and-class> ] unit-test
|
||||
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
|
||||
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
|
||||
|
@ -25,3 +27,28 @@ IN: regexp.classes.tests
|
|||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||
|
||||
! Making classes into nested conditionals
|
||||
|
||||
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
|
||||
[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
|
||||
[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
|
||||
[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
|
||||
[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
|
||||
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
|
||||
|
||||
SYMBOL: foo
|
||||
SYMBOL: bar
|
||||
|
||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
|
||||
|
||||
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
|
||||
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
|
||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t replace-question ] unit-test
|
||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f replace-question ] unit-test
|
||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t replace-question ] unit-test
|
||||
[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t replace-question ] unit-test
|
||||
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f replace-question ] unit-test
|
||||
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f replace-question ] unit-test
|
||||
[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t replace-question ] unit-test
|
||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f replace-question ] unit-test
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order words combinators locals
|
||||
ascii unicode.categories combinators.short-circuit sequences
|
||||
fry macros arrays ;
|
||||
fry macros arrays assocs sets ;
|
||||
IN: regexp.classes
|
||||
|
||||
SINGLETONS: any-char any-char-no-nl
|
||||
|
@ -208,3 +208,57 @@ M: primitive-class class-member?
|
|||
class>> class-member? ;
|
||||
|
||||
UNION: class primitive-class not-class or-class and-class range ;
|
||||
|
||||
TUPLE: condition question yes no ;
|
||||
C: <condition> condition
|
||||
|
||||
GENERIC# replace-question 2 ( class from to -- new-class )
|
||||
|
||||
M:: object replace-question ( class from to -- new-class )
|
||||
class from = to class ? ;
|
||||
|
||||
: replace-compound ( class from to -- seq )
|
||||
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
||||
|
||||
M: and-class replace-question
|
||||
replace-compound <and-class> ;
|
||||
|
||||
M: or-class replace-question
|
||||
replace-compound <or-class> ;
|
||||
|
||||
M: not-class replace-question
|
||||
class>> replace-question <not-class> ;
|
||||
|
||||
: answer ( table question answer -- new-table )
|
||||
'[ [ _ _ replace-question ] dip ] assoc-map
|
||||
[ drop ] assoc-filter ;
|
||||
|
||||
DEFER: make-condition
|
||||
|
||||
: (make-condition) ( table questions question -- condition )
|
||||
[ 2nip ]
|
||||
[ swap [ t answer ] dip make-condition ]
|
||||
[ swap [ f answer ] dip make-condition ] 3tri
|
||||
2dup = [ 2nip ] [ <condition> ] if ;
|
||||
|
||||
: make-condition ( table questions -- condition )
|
||||
[ values ] [ unclip (make-condition) ] if-empty ;
|
||||
|
||||
GENERIC: class>questions ( class -- questions )
|
||||
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||
M: or-class class>questions compound-questions ;
|
||||
M: and-class class>questions compound-questions ;
|
||||
M: not-class class>questions class>> class>questions ;
|
||||
M: object class>questions 1array ;
|
||||
|
||||
: table>questions ( table -- questions )
|
||||
keys <and-class> class>questions t swap remove ;
|
||||
|
||||
: table>condition ( table -- condition )
|
||||
>alist dup table>questions make-condition ;
|
||||
|
||||
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
|
||||
over condition? [
|
||||
[ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
|
||||
'[ _ condition-map ] bi@ <condition>
|
||||
] [ call ] if ; inline recursive
|
||||
|
|
|
@ -9,9 +9,17 @@ IN: regexp.compiler
|
|||
: literals>cases ( literal-transitions -- case-body )
|
||||
[ 1quotation ] assoc-map ;
|
||||
|
||||
: condition>quot ( condition -- quot )
|
||||
dup condition? [
|
||||
[ question>> ] [ yes>> ] [ no>> ] tri
|
||||
[ condition>quot ] bi@
|
||||
'[ dup _ class-member? _ _ if ]
|
||||
] [
|
||||
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
|
||||
] if ;
|
||||
|
||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
|
||||
[ 3drop ] suffix '[ _ cond ] ;
|
||||
table>condition condition>quot ;
|
||||
|
||||
: expand-one-or ( or-class transition -- alist )
|
||||
[ seq>> ] dip '[ _ 2array ] map ;
|
||||
|
@ -36,7 +44,7 @@ IN: regexp.compiler
|
|||
|
||||
: transitions>quot ( transitions final-state? -- quot )
|
||||
[ split-literals suffix ] dip
|
||||
'[ { array-capacity string } declare _ _ step ] ;
|
||||
'[ { array-capacity sequence } declare _ _ step ] ;
|
||||
|
||||
: word>quot ( word dfa -- quot )
|
||||
[ transitions>> at ]
|
||||
|
@ -67,11 +75,12 @@ IN: regexp.compiler
|
|||
: dfa>word ( dfa -- word )
|
||||
states>words [ states>code ] keep start-state>> ;
|
||||
|
||||
: check-string ( string -- string )
|
||||
dup string? [ "String required" throw ] unless ;
|
||||
: check-sequence ( string -- string )
|
||||
! Make this configurable
|
||||
dup sequence? [ "String required" throw ] unless ;
|
||||
|
||||
: run-regexp ( start-index string word -- ? )
|
||||
{ [ f ] [ >fixnum ] [ check-string ] [ execute ] } spread ; inline
|
||||
{ [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
|
||||
|
||||
: dfa>quotation ( dfa -- quot )
|
||||
dfa>word '[ _ run-regexp ] ;
|
||||
|
|
|
@ -8,9 +8,6 @@ IN: regexp.dfa
|
|||
: find-delta ( states transition nfa -- new-states )
|
||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||
|
||||
TUPLE: condition question yes no ;
|
||||
C: <condition> condition
|
||||
|
||||
:: epsilon-loop ( state table nfa question -- )
|
||||
state table at :> old-value
|
||||
old-value question 2array <or-class> :> new-question
|
||||
|
@ -27,53 +24,12 @@ C: <condition> condition
|
|||
] assoc-each
|
||||
] unless ;
|
||||
|
||||
GENERIC# replace-question 2 ( class from to -- new-class )
|
||||
|
||||
M: object replace-question
|
||||
[ [ = ] keep ] dip swap ? ;
|
||||
|
||||
: replace-compound ( class from to -- seq )
|
||||
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
||||
|
||||
M: and-class replace-question
|
||||
replace-compound <and-class> ;
|
||||
|
||||
M: or-class replace-question
|
||||
replace-compound <or-class> ;
|
||||
|
||||
: answer ( table question answer -- new-table )
|
||||
'[ _ _ replace-question ] assoc-map
|
||||
[ nip ] assoc-filter ;
|
||||
|
||||
DEFER: make-condition
|
||||
|
||||
: (make-condition) ( table questions question -- condition )
|
||||
[ 2nip ]
|
||||
[ swap [ t answer ] dip make-condition ]
|
||||
[ swap [ f answer ] dip make-condition ] 3tri
|
||||
<condition> ;
|
||||
|
||||
: make-condition ( table questions -- condition )
|
||||
[ keys ] [ unclip (make-condition) ] if-empty ;
|
||||
|
||||
GENERIC: class>questions ( class -- questions )
|
||||
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||
M: or-class class>questions compound-questions ;
|
||||
M: and-class class>questions compound-questions ;
|
||||
M: object class>questions 1array ;
|
||||
|
||||
: table>condition ( table -- condition )
|
||||
! This is wrong, since actually an arbitrary and-class or or-class can be used
|
||||
dup
|
||||
values <or-class> class>questions t swap remove
|
||||
make-condition ;
|
||||
|
||||
: epsilon-table ( states nfa -- table )
|
||||
[ H{ } clone tuck ] dip
|
||||
'[ _ _ t epsilon-loop ] each ;
|
||||
|
||||
: find-epsilon-closure ( states nfa -- dfa-state )
|
||||
epsilon-table table>condition ;
|
||||
epsilon-table [ swap ] assoc-map table>condition ;
|
||||
|
||||
: find-closure ( states transition nfa -- new-states )
|
||||
[ find-delta ] keep find-epsilon-closure ;
|
||||
|
|
|
@ -2,13 +2,13 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences regexp.transition-tables fry assocs
|
||||
accessors locals math sorting arrays sets hashtables regexp.dfa
|
||||
combinators.short-circuit ;
|
||||
combinators.short-circuit regexp.classes ;
|
||||
IN: regexp.minimize
|
||||
|
||||
: number-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ _ at ] assoc-map ] bi*
|
||||
[ [ [ _ at ] condition-map ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: table>state-numbers ( table -- assoc )
|
||||
|
@ -29,6 +29,9 @@ IN: regexp.minimize
|
|||
dup table>state-numbers
|
||||
[ number-transitions ] rewrite-transitions ;
|
||||
|
||||
: no-conditions? ( state transition-table -- ? )
|
||||
transitions>> at values [ condition? ] any? not ;
|
||||
|
||||
: initially-same? ( s1 s2 transition-table -- ? )
|
||||
{
|
||||
[ drop <= ]
|
||||
|
@ -39,7 +42,8 @@ IN: regexp.minimize
|
|||
:: initialize-partitions ( transition-table -- partitions )
|
||||
! Partition table is sorted-array => ?
|
||||
H{ } clone :> out
|
||||
transition-table transitions>> keys :> states
|
||||
transition-table transitions>> keys
|
||||
[ transition-table no-conditions? ] filter :> states
|
||||
states [| s1 |
|
||||
states [| s2 |
|
||||
s1 s2 transition-table initially-same?
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: regexp tools.test kernel sequences regexp.parser regexp.private
|
||||
regexp.traversal eval strings multiline accessors regexp.matchers ;
|
||||
eval strings multiline accessors regexp.matchers ;
|
||||
IN: regexp-tests
|
||||
|
||||
\ <regexp> must-infer
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors combinators kernel math sequences strings sets
|
||||
assocs prettyprint.backend prettyprint.custom make lexer
|
||||
namespaces parser arrays fry locals regexp.minimize
|
||||
regexp.parser regexp.nfa regexp.dfa regexp.traversal
|
||||
regexp.parser regexp.nfa regexp.dfa
|
||||
regexp.transition-tables splitting sorting regexp.ast
|
||||
regexp.negation regexp.matchers regexp.compiler ;
|
||||
IN: regexp
|
||||
|
@ -12,16 +12,16 @@ TUPLE: regexp
|
|||
{ raw read-only }
|
||||
{ parse-tree read-only }
|
||||
{ options read-only }
|
||||
dfa reverse-dfa dfa-quot ;
|
||||
dfa reverse-dfa ;
|
||||
|
||||
: make-regexp ( string ast -- regexp )
|
||||
f f <options> f f f regexp boa ; foldable
|
||||
f f <options> f f regexp boa ; foldable
|
||||
! Foldable because, when the dfa slot is set,
|
||||
! it'll be set to the same thing regardless of who sets it
|
||||
|
||||
: <optioned-regexp> ( string options -- regexp )
|
||||
[ dup parse-regexp ] [ string>options ] bi*
|
||||
f f f regexp boa ;
|
||||
f f regexp boa ;
|
||||
|
||||
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
||||
|
||||
|
@ -34,26 +34,25 @@ C: <reverse-matcher> reverse-matcher
|
|||
[ parse-tree>> ] [ options>> ] bi <with-options> ;
|
||||
|
||||
: compile-regexp ( regexp -- regexp )
|
||||
dup '[ [ _ get-ast ast>dfa ] unless* ] change-dfa ;
|
||||
|
||||
: compile-dfa-quot ( regexp -- regexp )
|
||||
dup '[ [ _ compile-regexp dfa>> dfa>quotation ] unless* ] change-dfa-quot ;
|
||||
dup '[ [ _ get-ast ast>dfa dfa>quotation ] unless* ] change-dfa ;
|
||||
|
||||
: <reversed-option> ( ast -- reversed )
|
||||
"r" string>options <with-options> ;
|
||||
|
||||
: compile-reverse ( regexp -- regexp )
|
||||
dup '[ [ _ get-ast <reversed-option> ast>dfa ] unless* ] change-reverse-dfa ;
|
||||
dup '[
|
||||
[
|
||||
_ get-ast <reversed-option>
|
||||
ast>dfa dfa>quotation
|
||||
] unless*
|
||||
] change-reverse-dfa ;
|
||||
|
||||
M: regexp match-index-from ( string regexp -- index/f )
|
||||
dup dfa-quot>>
|
||||
[ <quot-matcher> ]
|
||||
[ compile-regexp dfa>> <dfa-matcher> ] ?if
|
||||
match-index-from ;
|
||||
compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
|
||||
|
||||
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
||||
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
||||
<dfa-traverser> do-match match-index>> ;
|
||||
<quot-matcher> match-index-from ;
|
||||
|
||||
: find-regexp-syntax ( string -- prefix suffix )
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue