Beginnings of lookahead and lookbehind

db4
Daniel Ehrenberg 2009-03-04 00:36:03 -06:00
parent 1740b85598
commit eb231df4e7
10 changed files with 105 additions and 31 deletions

View File

@ -16,7 +16,7 @@ C: <at-least> at-least
TUPLE: tagged-epsilon tag ;
C: <tagged-epsilon> tagged-epsilon
CONSTANT: epsilon T{ tagged-epsilon }
CONSTANT: epsilon T{ tagged-epsilon { tag t } }
TUPLE: concatenation first second ;

View File

@ -21,6 +21,7 @@ IN: regexp.classes.tests
[ 1 ] [ 1 <not-class> <not-class> ] unit-test
[ 1 ] [ { 1 1 } <and-class> ] unit-test
[ 1 ] [ { 1 1 } <or-class> ] unit-test
[ t ] [ { t t } <or-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test

View File

@ -140,7 +140,7 @@ GENERIC: combine-or ( class1 class2 -- combined ? )
M: object combine-or replace-if-= ;
M: t combine-or
drop f ;
nip t ;
M: f combine-or
drop t ;

View File

@ -1,5 +1,3 @@
USING: regexp.dfa tools.test ;
IN: regexp.dfa.tests
[ [ ] [ ] while-changes ] must-infer

View File

@ -2,35 +2,84 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp.nfa regexp.transition-tables sequences
sets sorting vectors regexp.ast ;
sets sorting vectors regexp.ast regexp.classes ;
IN: regexp.dfa
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
obj quot call :> new-obj
new-obj comp call :> new-key
new-key old-key =
[ new-obj ]
[ new-obj quot comp new-key (while-changes) ]
if ; inline recursive
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline
: find-delta ( states transition nfa -- new-states )
transitions>> '[ _ swap _ at at ] gather sift ;
: (find-epsilon-closure) ( states nfa -- new-states )
epsilon swap find-delta ;
TUPLE: condition question yes no ;
C: <condition> condition
: find-epsilon-closure ( states nfa -- new-states )
'[ dup _ (find-epsilon-closure) union ] [ length ] while-changes
natural-sort ;
:: epsilon-loop ( state table nfa question -- )
state table at :> old-value
old-value question 2array <or-class> :> new-question
new-question old-value = [
new-question state table set-at
state nfa transitions>> at
[ drop tagged-epsilon? ] assoc-filter
[| trans to |
to [
table nfa
trans tag>> new-question 2array <and-class>
epsilon-loop
] each
] assoc-each
] unless ;
GENERIC# replace-question 2 ( class from to -- new-class )
M: object replace-question
[ [ = ] keep ] dip swap ? ;
: replace-compound ( class from to -- seq )
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
M: and-class replace-question
replace-compound <and-class> ;
M: or-class replace-question
replace-compound <or-class> ;
: answer ( table question answer -- new-table )
'[ _ _ replace-question ] assoc-map
[ nip ] assoc-filter ;
DEFER: make-condition
: (make-condition) ( table questions question -- condition )
[ 2nip ]
[ swap [ t answer ] dip make-condition ]
[ swap [ f answer ] dip make-condition ] 3tri
<condition> ;
: make-condition ( table questions -- condition )
[ keys ] [ unclip (make-condition) ] if-empty ;
GENERIC: class>questions ( class -- questions )
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
M: or-class class>questions compound-questions ;
M: and-class class>questions compound-questions ;
M: object class>questions 1array ;
: table>condition ( table -- condition )
! This is wrong, since actually an arbitrary and-class or or-class can be used
dup
values <or-class> class>questions t swap remove
make-condition ;
: epsilon-table ( states nfa -- table )
[ H{ } clone tuck ] dip
'[ _ _ t epsilon-loop ] each ;
: find-epsilon-closure ( states nfa -- dfa-state )
epsilon-table table>condition ;
: find-closure ( states transition nfa -- new-states )
[ find-delta ] keep find-epsilon-closure ;
: find-start-state ( nfa -- state )
[ start-state>> 1vector ] keep find-epsilon-closure ;
[ start-state>> 1array ] keep find-epsilon-closure ;
: find-transitions ( dfa-state nfa -- next-dfa-state )
transitions>>
@ -49,7 +98,7 @@ IN: regexp.dfa
[| trans |
state trans nfa find-closure :> new-state
new-state visited-states new-states add-todo-state
state new-state trans dfa add-transition
state new-state trans dfa set-transition
] each
nfa dfa new-states visited-states new-transitions
] if-empty ;

View File

@ -47,3 +47,5 @@ IN: regexp.minimize.tests
{ final-states H{ { 3 3 } { 6 6 } } }
} combine-states
] unit-test
[ [ ] [ ] while-changes ] must-infer

View File

@ -8,7 +8,7 @@ IN: regexp.minimize
: number-transitions ( transitions numbering -- new-transitions )
dup '[
[ _ at ]
[ [ first _ at ] assoc-map ] bi*
[ [ _ at ] assoc-map ] bi*
] assoc-map ;
: table>state-numbers ( table -- assoc )
@ -66,6 +66,17 @@ IN: regexp.minimize
<reversed>
>hashtable ;
:: (while-changes) ( obj quot: ( obj -- obj' ) comp: ( obj -- key ) old-key -- obj )
obj quot call :> new-obj
new-obj comp call :> new-key
new-key old-key =
[ new-obj ]
[ new-obj quot comp new-key (while-changes) ]
if ; inline recursive
: while-changes ( obj quot pred -- obj' )
3dup nip call (while-changes) ; inline
: state-classes ( transition-table -- synonyms )
[ initialize-partitions ] keep
'[ _ partition-more ] [ assoc-size ] while-changes

View File

@ -55,8 +55,12 @@ M:: star nfa-node ( node -- start end )
s1 s3 epsilon-transition
s2 s3 ;
GENERIC: modify-epsilon ( tag -- newtag )
M: object modify-epsilon ;
M: tagged-epsilon nfa-node
add-simple-entry ;
clone [ modify-epsilon ] change-tag add-simple-entry ;
M: concatenation nfa-node ( node -- start end )
[ first>> ] [ second>> ] bi

View File

@ -137,10 +137,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
=> [[ a on off parse-options <with-options> ]]
| "?#" [^)]* => [[ f ]]
| "?~" Alternation:a => [[ a <negation> ]]
| "?=" Alternation:a => [[ a <lookahead> ]]
| "?!" Alternation:a => [[ a <negation> <lookahead> ]]
| "?<=" Alternation:a => [[ a <lookbehind> ]]
| "?<!" Alternation:a => [[ a <negation> <lookbehind> ]]
| "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
| "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
| "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
| "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
| Alternation
Element = "(" Parenthized:p ")" => [[ p ]]

View File

@ -14,11 +14,20 @@ TUPLE: transition-table transitions start-state final-states ;
: maybe-initialize-key ( key hashtable -- )
2dup key? [ 2drop ] [ [ H{ } clone ] 2dip set-at ] if ;
:: set-transition ( from to obj hash -- )
:: (set-transition) ( from to obj hash -- )
to hash maybe-initialize-key
from hash at
[ [ to obj ] dip set-at ]
[ to obj associate from hash set-at ] if* ;
: set-transition ( from to obj transition-table -- )
transitions>> (set-transition) ;
:: (add-transition) ( from to obj hash -- )
to hash maybe-initialize-key
from hash at
[ [ to obj ] dip push-at ]
[ to 1vector obj associate from hash set-at ] if* ;
: add-transition ( from to obj transition-table -- )
transitions>> set-transition ;
transitions>> (add-transition) ;