Beginnings of lookahead and lookbehind
parent
1740b85598
commit
eb231df4e7
basis/regexp
ast
nfa
parser
transition-tables
|
@ -16,7 +16,7 @@ C: <at-least> at-least
|
|||
TUPLE: tagged-epsilon tag ;
|
||||
C: <tagged-epsilon> tagged-epsilon
|
||||
|
||||
CONSTANT: epsilon T{ tagged-epsilon }
|
||||
CONSTANT: epsilon T{ tagged-epsilon { tag t } }
|
||||
|
||||
TUPLE: concatenation first second ;
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ IN: regexp.classes.tests
|
|||
[ 1 ] [ 1 <not-class> <not-class> ] unit-test
|
||||
[ 1 ] [ { 1 1 } <and-class> ] unit-test
|
||||
[ 1 ] [ { 1 1 } <or-class> ] unit-test
|
||||
[ t ] [ { t t } <or-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||
|
|
|
@ -140,7 +140,7 @@ GENERIC: combine-or ( class1 class2 -- combined ? )
|
|||
M: object combine-or replace-if-= ;
|
||||
|
||||
M: t combine-or
|
||||
drop f ;
|
||||
nip t ;
|
||||
|
||||
M: f combine-or
|
||||
drop t ;
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
USING: regexp.dfa tools.test ;
|
||||
IN: regexp.dfa.tests
|
||||
|
||||
[ [ ] [ ] while-changes ] must-infer
|
||||
|
||||
|
|
|
@ -2,35 +2,84 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators fry kernel locals
|
||||
math math.order regexp.nfa regexp.transition-tables sequences
|
||||
sets sorting vectors regexp.ast ;
|
||||
sets sorting vectors regexp.ast regexp.classes ;
|
||||
IN: regexp.dfa
|
||||
|
||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||
obj quot call :> new-obj
|
||||
new-obj comp call :> new-key
|
||||
new-key old-key =
|
||||
[ new-obj ]
|
||||
[ new-obj quot comp new-key (while-changes) ]
|
||||
if ; inline recursive
|
||||
|
||||
: while-changes ( obj quot pred -- obj' )
|
||||
3dup nip call (while-changes) ; inline
|
||||
|
||||
: find-delta ( states transition nfa -- new-states )
|
||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||
|
||||
: (find-epsilon-closure) ( states nfa -- new-states )
|
||||
epsilon swap find-delta ;
|
||||
TUPLE: condition question yes no ;
|
||||
C: <condition> condition
|
||||
|
||||
: find-epsilon-closure ( states nfa -- new-states )
|
||||
'[ dup _ (find-epsilon-closure) union ] [ length ] while-changes
|
||||
natural-sort ;
|
||||
:: epsilon-loop ( state table nfa question -- )
|
||||
state table at :> old-value
|
||||
old-value question 2array <or-class> :> new-question
|
||||
new-question old-value = [
|
||||
new-question state table set-at
|
||||
state nfa transitions>> at
|
||||
[ drop tagged-epsilon? ] assoc-filter
|
||||
[| trans to |
|
||||
to [
|
||||
table nfa
|
||||
trans tag>> new-question 2array <and-class>
|
||||
epsilon-loop
|
||||
] each
|
||||
] assoc-each
|
||||
] unless ;
|
||||
|
||||
GENERIC# replace-question 2 ( class from to -- new-class )
|
||||
|
||||
M: object replace-question
|
||||
[ [ = ] keep ] dip swap ? ;
|
||||
|
||||
: replace-compound ( class from to -- seq )
|
||||
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
||||
|
||||
M: and-class replace-question
|
||||
replace-compound <and-class> ;
|
||||
|
||||
M: or-class replace-question
|
||||
replace-compound <or-class> ;
|
||||
|
||||
: answer ( table question answer -- new-table )
|
||||
'[ _ _ replace-question ] assoc-map
|
||||
[ nip ] assoc-filter ;
|
||||
|
||||
DEFER: make-condition
|
||||
|
||||
: (make-condition) ( table questions question -- condition )
|
||||
[ 2nip ]
|
||||
[ swap [ t answer ] dip make-condition ]
|
||||
[ swap [ f answer ] dip make-condition ] 3tri
|
||||
<condition> ;
|
||||
|
||||
: make-condition ( table questions -- condition )
|
||||
[ keys ] [ unclip (make-condition) ] if-empty ;
|
||||
|
||||
GENERIC: class>questions ( class -- questions )
|
||||
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||
M: or-class class>questions compound-questions ;
|
||||
M: and-class class>questions compound-questions ;
|
||||
M: object class>questions 1array ;
|
||||
|
||||
: table>condition ( table -- condition )
|
||||
! This is wrong, since actually an arbitrary and-class or or-class can be used
|
||||
dup
|
||||
values <or-class> class>questions t swap remove
|
||||
make-condition ;
|
||||
|
||||
: epsilon-table ( states nfa -- table )
|
||||
[ H{ } clone tuck ] dip
|
||||
'[ _ _ t epsilon-loop ] each ;
|
||||
|
||||
: find-epsilon-closure ( states nfa -- dfa-state )
|
||||
epsilon-table table>condition ;
|
||||
|
||||
: find-closure ( states transition nfa -- new-states )
|
||||
[ find-delta ] keep find-epsilon-closure ;
|
||||
|
||||
: find-start-state ( nfa -- state )
|
||||
[ start-state>> 1vector ] keep find-epsilon-closure ;
|
||||
[ start-state>> 1array ] keep find-epsilon-closure ;
|
||||
|
||||
: find-transitions ( dfa-state nfa -- next-dfa-state )
|
||||
transitions>>
|
||||
|
@ -49,7 +98,7 @@ IN: regexp.dfa
|
|||
[| trans |
|
||||
state trans nfa find-closure :> new-state
|
||||
new-state visited-states new-states add-todo-state
|
||||
state new-state trans dfa add-transition
|
||||
state new-state trans dfa set-transition
|
||||
] each
|
||||
nfa dfa new-states visited-states new-transitions
|
||||
] if-empty ;
|
||||
|
|
|
@ -47,3 +47,5 @@ IN: regexp.minimize.tests
|
|||
{ final-states H{ { 3 3 } { 6 6 } } }
|
||||
} combine-states
|
||||
] unit-test
|
||||
|
||||
[ [ ] [ ] while-changes ] must-infer
|
||||
|
|
|
@ -8,7 +8,7 @@ IN: regexp.minimize
|
|||
: number-transitions ( transitions numbering -- new-transitions )
|
||||
dup '[
|
||||
[ _ at ]
|
||||
[ [ first _ at ] assoc-map ] bi*
|
||||
[ [ _ at ] assoc-map ] bi*
|
||||
] assoc-map ;
|
||||
|
||||
: table>state-numbers ( table -- assoc )
|
||||
|
@ -66,6 +66,17 @@ IN: regexp.minimize
|
|||
<reversed>
|
||||
>hashtable ;
|
||||
|
||||
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
|
||||
obj quot call :> new-obj
|
||||
new-obj comp call :> new-key
|
||||
new-key old-key =
|
||||
[ new-obj ]
|
||||
[ new-obj quot comp new-key (while-changes) ]
|
||||
if ; inline recursive
|
||||
|
||||
: while-changes ( obj quot pred -- obj' )
|
||||
3dup nip call (while-changes) ; inline
|
||||
|
||||
: state-classes ( transition-table -- synonyms )
|
||||
[ initialize-partitions ] keep
|
||||
'[ _ partition-more ] [ assoc-size ] while-changes
|
||||
|
|
|
@ -55,8 +55,12 @@ M:: star nfa-node ( node -- start end )
|
|||
s1 s3 epsilon-transition
|
||||
s2 s3 ;
|
||||
|
||||
GENERIC: modify-epsilon ( tag -- newtag )
|
||||
|
||||
M: object modify-epsilon ;
|
||||
|
||||
M: tagged-epsilon nfa-node
|
||||
add-simple-entry ;
|
||||
clone [ modify-epsilon ] change-tag add-simple-entry ;
|
||||
|
||||
M: concatenation nfa-node ( node -- start end )
|
||||
[ first>> ] [ second>> ] bi
|
||||
|
|
|
@ -137,10 +137,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
|
|||
=> [[ a on off parse-options <with-options> ]]
|
||||
| "?#" [^)]* => [[ f ]]
|
||||
| "?~" Alternation:a => [[ a <negation> ]]
|
||||
| "?=" Alternation:a => [[ a <lookahead> ]]
|
||||
| "?!" Alternation:a => [[ a <negation> <lookahead> ]]
|
||||
| "?<=" Alternation:a => [[ a <lookbehind> ]]
|
||||
| "?<!" Alternation:a => [[ a <negation> <lookbehind> ]]
|
||||
| "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
|
||||
| "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
|
||||
| "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
|
||||
| "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
|
||||
| Alternation
|
||||
|
||||
Element = "(" Parenthized:p ")" => [[ p ]]
|
||||
|
|
|
@ -14,11 +14,20 @@ TUPLE: transition-table transitions start-state final-states ;
|
|||
: maybe-initialize-key ( key hashtable -- )
|
||||
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
|
||||
|
||||
:: set-transition ( from to obj hash -- )
|
||||
:: (set-transition) ( from to obj hash -- )
|
||||
to hash maybe-initialize-key
|
||||
from hash at
|
||||
[ [ to obj ] dip set-at ]
|
||||
[ to obj associate from hash set-at ] if* ;
|
||||
|
||||
: set-transition ( from to obj transition-table -- )
|
||||
transitions>> (set-transition) ;
|
||||
|
||||
:: (add-transition) ( from to obj hash -- )
|
||||
to hash maybe-initialize-key
|
||||
from hash at
|
||||
[ [ to obj ] dip push-at ]
|
||||
[ to 1vector obj associate from hash set-at ] if* ;
|
||||
|
||||
: add-transition ( from to obj transition-table -- )
|
||||
transitions>> set-transition ;
|
||||
transitions>> (add-transition) ;
|
||||
|
|
Loading…
Reference in New Issue