Beginnings of lookahead and lookbehind
parent
1740b85598
commit
eb231df4e7
|
@ -16,7 +16,7 @@ C: <at-least> at-least
|
||||||
TUPLE: tagged-epsilon tag ;
|
TUPLE: tagged-epsilon tag ;
|
||||||
C: <tagged-epsilon> tagged-epsilon
|
C: <tagged-epsilon> tagged-epsilon
|
||||||
|
|
||||||
CONSTANT: epsilon T{ tagged-epsilon }
|
CONSTANT: epsilon T{ tagged-epsilon { tag t } }
|
||||||
|
|
||||||
TUPLE: concatenation first second ;
|
TUPLE: concatenation first second ;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ IN: regexp.classes.tests
|
||||||
[ 1 ] [ 1 <not-class> <not-class> ] unit-test
|
[ 1 ] [ 1 <not-class> <not-class> ] unit-test
|
||||||
[ 1 ] [ { 1 1 } <and-class> ] unit-test
|
[ 1 ] [ { 1 1 } <and-class> ] unit-test
|
||||||
[ 1 ] [ { 1 1 } <or-class> ] unit-test
|
[ 1 ] [ { 1 1 } <or-class> ] unit-test
|
||||||
|
[ t ] [ { t t } <or-class> ] unit-test
|
||||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||||
|
|
|
@ -140,7 +140,7 @@ GENERIC: combine-or ( class1 class2 -- combined ? )
|
||||||
M: object combine-or replace-if-= ;
|
M: object combine-or replace-if-= ;
|
||||||
|
|
||||||
M: t combine-or
|
M: t combine-or
|
||||||
drop f ;
|
nip t ;
|
||||||
|
|
||||||
M: f combine-or
|
M: f combine-or
|
||||||
drop t ;
|
drop t ;
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
USING: regexp.dfa tools.test ;
|
USING: regexp.dfa tools.test ;
|
||||||
IN: regexp.dfa.tests
|
IN: regexp.dfa.tests
|
||||||
|
|
||||||
[ [ ] [ ] while-changes ] must-infer
|
|
||||||
|
|
||||||
|
|
|
@ -2,35 +2,84 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators fry kernel locals
|
USING: accessors arrays assocs combinators fry kernel locals
|
||||||
math math.order regexp.nfa regexp.transition-tables sequences
|
math math.order regexp.nfa regexp.transition-tables sequences
|
||||||
sets sorting vectors regexp.ast ;
|
sets sorting vectors regexp.ast regexp.classes ;
|
||||||
IN: regexp.dfa
|
IN: regexp.dfa
|
||||||
|
|
||||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
|
||||||
obj quot call :> new-obj
|
|
||||||
new-obj comp call :> new-key
|
|
||||||
new-key old-key =
|
|
||||||
[ new-obj ]
|
|
||||||
[ new-obj quot comp new-key (while-changes) ]
|
|
||||||
if ; inline recursive
|
|
||||||
|
|
||||||
: while-changes ( obj quot pred -- obj' )
|
|
||||||
3dup nip call (while-changes) ; inline
|
|
||||||
|
|
||||||
: find-delta ( states transition nfa -- new-states )
|
: find-delta ( states transition nfa -- new-states )
|
||||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||||
|
|
||||||
: (find-epsilon-closure) ( states nfa -- new-states )
|
TUPLE: condition question yes no ;
|
||||||
epsilon swap find-delta ;
|
C: <condition> condition
|
||||||
|
|
||||||
: find-epsilon-closure ( states nfa -- new-states )
|
:: epsilon-loop ( state table nfa question -- )
|
||||||
'[ dup _ (find-epsilon-closure) union ] [ length ] while-changes
|
state table at :> old-value
|
||||||
natural-sort ;
|
old-value question 2array <or-class> :> new-question
|
||||||
|
new-question old-value = [
|
||||||
|
new-question state table set-at
|
||||||
|
state nfa transitions>> at
|
||||||
|
[ drop tagged-epsilon? ] assoc-filter
|
||||||
|
[| trans to |
|
||||||
|
to [
|
||||||
|
table nfa
|
||||||
|
trans tag>> new-question 2array <and-class>
|
||||||
|
epsilon-loop
|
||||||
|
] each
|
||||||
|
] assoc-each
|
||||||
|
] unless ;
|
||||||
|
|
||||||
|
GENERIC# replace-question 2 ( class from to -- new-class )
|
||||||
|
|
||||||
|
M: object replace-question
|
||||||
|
[ [ = ] keep ] dip swap ? ;
|
||||||
|
|
||||||
|
: replace-compound ( class from to -- seq )
|
||||||
|
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
||||||
|
|
||||||
|
M: and-class replace-question
|
||||||
|
replace-compound <and-class> ;
|
||||||
|
|
||||||
|
M: or-class replace-question
|
||||||
|
replace-compound <or-class> ;
|
||||||
|
|
||||||
|
: answer ( table question answer -- new-table )
|
||||||
|
'[ _ _ replace-question ] assoc-map
|
||||||
|
[ nip ] assoc-filter ;
|
||||||
|
|
||||||
|
DEFER: make-condition
|
||||||
|
|
||||||
|
: (make-condition) ( table questions question -- condition )
|
||||||
|
[ 2nip ]
|
||||||
|
[ swap [ t answer ] dip make-condition ]
|
||||||
|
[ swap [ f answer ] dip make-condition ] 3tri
|
||||||
|
<condition> ;
|
||||||
|
|
||||||
|
: make-condition ( table questions -- condition )
|
||||||
|
[ keys ] [ unclip (make-condition) ] if-empty ;
|
||||||
|
|
||||||
|
GENERIC: class>questions ( class -- questions )
|
||||||
|
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||||
|
M: or-class class>questions compound-questions ;
|
||||||
|
M: and-class class>questions compound-questions ;
|
||||||
|
M: object class>questions 1array ;
|
||||||
|
|
||||||
|
: table>condition ( table -- condition )
|
||||||
|
! This is wrong, since actually an arbitrary and-class or or-class can be used
|
||||||
|
dup
|
||||||
|
values <or-class> class>questions t swap remove
|
||||||
|
make-condition ;
|
||||||
|
|
||||||
|
: epsilon-table ( states nfa -- table )
|
||||||
|
[ H{ } clone tuck ] dip
|
||||||
|
'[ _ _ t epsilon-loop ] each ;
|
||||||
|
|
||||||
|
: find-epsilon-closure ( states nfa -- dfa-state )
|
||||||
|
epsilon-table table>condition ;
|
||||||
|
|
||||||
: find-closure ( states transition nfa -- new-states )
|
: find-closure ( states transition nfa -- new-states )
|
||||||
[ find-delta ] keep find-epsilon-closure ;
|
[ find-delta ] keep find-epsilon-closure ;
|
||||||
|
|
||||||
: find-start-state ( nfa -- state )
|
: find-start-state ( nfa -- state )
|
||||||
[ start-state>> 1vector ] keep find-epsilon-closure ;
|
[ start-state>> 1array ] keep find-epsilon-closure ;
|
||||||
|
|
||||||
: find-transitions ( dfa-state nfa -- next-dfa-state )
|
: find-transitions ( dfa-state nfa -- next-dfa-state )
|
||||||
transitions>>
|
transitions>>
|
||||||
|
@ -49,7 +98,7 @@ IN: regexp.dfa
|
||||||
[| trans |
|
[| trans |
|
||||||
state trans nfa find-closure :> new-state
|
state trans nfa find-closure :> new-state
|
||||||
new-state visited-states new-states add-todo-state
|
new-state visited-states new-states add-todo-state
|
||||||
state new-state trans dfa add-transition
|
state new-state trans dfa set-transition
|
||||||
] each
|
] each
|
||||||
nfa dfa new-states visited-states new-transitions
|
nfa dfa new-states visited-states new-transitions
|
||||||
] if-empty ;
|
] if-empty ;
|
||||||
|
|
|
@ -47,3 +47,5 @@ IN: regexp.minimize.tests
|
||||||
{ final-states H{ { 3 3 } { 6 6 } } }
|
{ final-states H{ { 3 3 } { 6 6 } } }
|
||||||
} combine-states
|
} combine-states
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ [ ] [ ] while-changes ] must-infer
|
||||||
|
|
|
@ -8,7 +8,7 @@ IN: regexp.minimize
|
||||||
: number-transitions ( transitions numbering -- new-transitions )
|
: number-transitions ( transitions numbering -- new-transitions )
|
||||||
dup '[
|
dup '[
|
||||||
[ _ at ]
|
[ _ at ]
|
||||||
[ [ first _ at ] assoc-map ] bi*
|
[ [ _ at ] assoc-map ] bi*
|
||||||
] assoc-map ;
|
] assoc-map ;
|
||||||
|
|
||||||
: table>state-numbers ( table -- assoc )
|
: table>state-numbers ( table -- assoc )
|
||||||
|
@ -66,6 +66,17 @@ IN: regexp.minimize
|
||||||
<reversed>
|
<reversed>
|
||||||
>hashtable ;
|
>hashtable ;
|
||||||
|
|
||||||
|
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||||
|
obj quot call :> new-obj
|
||||||
|
new-obj comp call :> new-key
|
||||||
|
new-key old-key =
|
||||||
|
[ new-obj ]
|
||||||
|
[ new-obj quot comp new-key (while-changes) ]
|
||||||
|
if ; inline recursive
|
||||||
|
|
||||||
|
: while-changes ( obj quot pred -- obj' )
|
||||||
|
3dup nip call (while-changes) ; inline
|
||||||
|
|
||||||
: state-classes ( transition-table -- synonyms )
|
: state-classes ( transition-table -- synonyms )
|
||||||
[ initialize-partitions ] keep
|
[ initialize-partitions ] keep
|
||||||
'[ _ partition-more ] [ assoc-size ] while-changes
|
'[ _ partition-more ] [ assoc-size ] while-changes
|
||||||
|
|
|
@ -55,8 +55,12 @@ M:: star nfa-node ( node -- start end )
|
||||||
s1 s3 epsilon-transition
|
s1 s3 epsilon-transition
|
||||||
s2 s3 ;
|
s2 s3 ;
|
||||||
|
|
||||||
|
GENERIC: modify-epsilon ( tag -- newtag )
|
||||||
|
|
||||||
|
M: object modify-epsilon ;
|
||||||
|
|
||||||
M: tagged-epsilon nfa-node
|
M: tagged-epsilon nfa-node
|
||||||
add-simple-entry ;
|
clone [ modify-epsilon ] change-tag add-simple-entry ;
|
||||||
|
|
||||||
M: concatenation nfa-node ( node -- start end )
|
M: concatenation nfa-node ( node -- start end )
|
||||||
[ first>> ] [ second>> ] bi
|
[ first>> ] [ second>> ] bi
|
||||||
|
|
|
@ -137,10 +137,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
|
||||||
=> [[ a on off parse-options <with-options> ]]
|
=> [[ a on off parse-options <with-options> ]]
|
||||||
| "?#" [^)]* => [[ f ]]
|
| "?#" [^)]* => [[ f ]]
|
||||||
| "?~" Alternation:a => [[ a <negation> ]]
|
| "?~" Alternation:a => [[ a <negation> ]]
|
||||||
| "?=" Alternation:a => [[ a <lookahead> ]]
|
| "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
|
||||||
| "?!" Alternation:a => [[ a <negation> <lookahead> ]]
|
| "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
|
||||||
| "?<=" Alternation:a => [[ a <lookbehind> ]]
|
| "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
|
||||||
| "?<!" Alternation:a => [[ a <negation> <lookbehind> ]]
|
| "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
|
||||||
| Alternation
|
| Alternation
|
||||||
|
|
||||||
Element = "(" Parenthized:p ")" => [[ p ]]
|
Element = "(" Parenthized:p ")" => [[ p ]]
|
||||||
|
|
|
@ -14,11 +14,20 @@ TUPLE: transition-table transitions start-state final-states ;
|
||||||
: maybe-initialize-key ( key hashtable -- )
|
: maybe-initialize-key ( key hashtable -- )
|
||||||
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
||||||
|
|
||||||
:: set-transition ( from to obj hash -- )
|
:: (set-transition) ( from to obj hash -- )
|
||||||
|
to hash maybe-initialize-key
|
||||||
|
from hash at
|
||||||
|
[ [ to obj ] dip set-at ]
|
||||||
|
[ to obj associate from hash set-at ] if* ;
|
||||||
|
|
||||||
|
: set-transition ( from to obj transition-table -- )
|
||||||
|
transitions>> (set-transition) ;
|
||||||
|
|
||||||
|
:: (add-transition) ( from to obj hash -- )
|
||||||
to hash maybe-initialize-key
|
to hash maybe-initialize-key
|
||||||
from hash at
|
from hash at
|
||||||
[ [ to obj ] dip push-at ]
|
[ [ to obj ] dip push-at ]
|
||||||
[ to 1vector obj associate from hash set-at ] if* ;
|
[ to 1vector obj associate from hash set-at ] if* ;
|
||||||
|
|
||||||
: add-transition ( from to obj transition-table -- )
|
: add-transition ( from to obj transition-table -- )
|
||||||
transitions>> set-transition ;
|
transitions>> (add-transition) ;
|
||||||
|
|
Loading…
Reference in New Issue