Unfinished changes for regexp lookaround

db4
Daniel Ehrenberg 2009-03-04 13:22:22 -06:00
parent 4f306518dc
commit ca19a1b728
7 changed files with 119 additions and 70 deletions

View File

@ -3,6 +3,8 @@
USING: regexp.classes tools.test arrays kernel ;
IN: regexp.classes.tests
! Class algebra
[ f ] [ { 1 2 } <and-class> ] unit-test
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
@ -25,3 +27,28 @@ IN: regexp.classes.tests
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
! Making classes into nested conditionals
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
SYMBOL: foo
SYMBOL: bar
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t replace-question ] unit-test
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f replace-question ] unit-test
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t replace-question ] unit-test
[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t replace-question ] unit-test
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f replace-question ] unit-test
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f replace-question ] unit-test
[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t replace-question ] unit-test
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f replace-question ] unit-test

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences
fry macros arrays ;
fry macros arrays assocs sets ;
IN: regexp.classes
SINGLETONS: any-char any-char-no-nl
@ -208,3 +208,57 @@ M: primitive-class class-member?
class>> class-member? ;
UNION: class primitive-class not-class or-class and-class range ;
TUPLE: condition question yes no ;
C: <condition> condition
GENERIC# replace-question 2 ( class from to -- new-class )
M:: object replace-question ( class from to -- new-class )
class from = to class ? ;
: replace-compound ( class from to -- seq )
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
M: and-class replace-question
replace-compound <and-class> ;
M: or-class replace-question
replace-compound <or-class> ;
M: not-class replace-question
class>> replace-question <not-class> ;
: answer ( table question answer -- new-table )
'[ [ _ _ replace-question ] dip ] assoc-map
[ drop ] assoc-filter ;
DEFER: make-condition
: (make-condition) ( table questions question -- condition )
[ 2nip ]
[ swap [ t answer ] dip make-condition ]
[ swap [ f answer ] dip make-condition ] 3tri
2dup = [ 2nip ] [ <condition> ] if ;
: make-condition ( table questions -- condition )
[ values ] [ unclip (make-condition) ] if-empty ;
GENERIC: class>questions ( class -- questions )
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
M: or-class class>questions compound-questions ;
M: and-class class>questions compound-questions ;
M: not-class class>questions class>> class>questions ;
M: object class>questions 1array ;
: table>questions ( table -- questions )
keys <and-class> class>questions t swap remove ;
: table>condition ( table -- condition )
>alist dup table>questions make-condition ;
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
over condition? [
[ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
'[ _ condition-map ] bi@ <condition>
] [ call ] if ; inline recursive

View File

@ -9,9 +9,17 @@ IN: regexp.compiler
: literals>cases ( literal-transitions -- case-body )
[ 1quotation ] assoc-map ;
: condition>quot ( condition -- quot )
dup condition? [
[ question>> ] [ yes>> ] [ no>> ] tri
[ condition>quot ] bi@
'[ dup _ class-member? _ _ if ]
] [
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
] if ;
: non-literals>dispatch ( non-literal-transitions -- quot )
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
[ 3drop ] suffix '[ _ cond ] ;
table>condition condition>quot ;
: expand-one-or ( or-class transition -- alist )
[ seq>> ] dip '[ _ 2array ] map ;
@ -36,7 +44,7 @@ IN: regexp.compiler
: transitions>quot ( transitions final-state? -- quot )
[ split-literals suffix ] dip
'[ { array-capacity string } declare _ _ step ] ;
'[ { array-capacity sequence } declare _ _ step ] ;
: word>quot ( word dfa -- quot )
[ transitions>> at ]
@ -67,11 +75,12 @@ IN: regexp.compiler
: dfa>word ( dfa -- word )
states>words [ states>code ] keep start-state>> ;
: check-string ( string -- string )
dup string? [ "String required" throw ] unless ;
: check-sequence ( string -- string )
! Make this configurable
dup sequence? [ "String required" throw ] unless ;
: run-regexp ( start-index string word -- ? )
{ [ f ] [ >fixnum ] [ check-string ] [ execute ] } spread ; inline
{ [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
: dfa>quotation ( dfa -- quot )
dfa>word '[ _ run-regexp ] ;

View File

@ -8,9 +8,6 @@ IN: regexp.dfa
: find-delta ( states transition nfa -- new-states )
transitions>> '[ _ swap _ at at ] gather sift ;
TUPLE: condition question yes no ;
C: <condition> condition
:: epsilon-loop ( state table nfa question -- )
state table at :> old-value
old-value question 2array <or-class> :> new-question
@ -27,53 +24,12 @@ C: <condition> condition
] assoc-each
] unless ;
GENERIC# replace-question 2 ( class from to -- new-class )
M: object replace-question
[ [ = ] keep ] dip swap ? ;
: replace-compound ( class from to -- seq )
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
M: and-class replace-question
replace-compound <and-class> ;
M: or-class replace-question
replace-compound <or-class> ;
: answer ( table question answer -- new-table )
'[ _ _ replace-question ] assoc-map
[ nip ] assoc-filter ;
DEFER: make-condition
: (make-condition) ( table questions question -- condition )
[ 2nip ]
[ swap [ t answer ] dip make-condition ]
[ swap [ f answer ] dip make-condition ] 3tri
<condition> ;
: make-condition ( table questions -- condition )
[ keys ] [ unclip (make-condition) ] if-empty ;
GENERIC: class>questions ( class -- questions )
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
M: or-class class>questions compound-questions ;
M: and-class class>questions compound-questions ;
M: object class>questions 1array ;
: table>condition ( table -- condition )
! This is wrong, since actually an arbitrary and-class or or-class can be used
dup
values <or-class> class>questions t swap remove
make-condition ;
: epsilon-table ( states nfa -- table )
[ H{ } clone tuck ] dip
'[ _ _ t epsilon-loop ] each ;
: find-epsilon-closure ( states nfa -- dfa-state )
epsilon-table table>condition ;
epsilon-table [ swap ] assoc-map table>condition ;
: find-closure ( states transition nfa -- new-states )
[ find-delta ] keep find-epsilon-closure ;

View File

@ -2,13 +2,13 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences regexp.transition-tables fry assocs
accessors locals math sorting arrays sets hashtables regexp.dfa
combinators.short-circuit ;
combinators.short-circuit regexp.classes ;
IN: regexp.minimize
: number-transitions ( transitions numbering -- new-transitions )
dup '[
[ _ at ]
[ [ _ at ] assoc-map ] bi*
[ [ [ _ at ] condition-map ] assoc-map ] bi*
] assoc-map ;
: table>state-numbers ( table -- assoc )
@ -29,6 +29,9 @@ IN: regexp.minimize
dup table>state-numbers
[ number-transitions ] rewrite-transitions ;
: no-conditions? ( state transition-table -- ? )
transitions>> at values [ condition? ] any? not ;
: initially-same? ( s1 s2 transition-table -- ? )
{
[ drop <= ]
@ -39,7 +42,8 @@ IN: regexp.minimize
:: initialize-partitions ( transition-table -- partitions )
! Partition table is sorted-array => ?
H{ } clone :> out
transition-table transitions>> keys :> states
transition-table transitions>> keys
[ transition-table no-conditions? ] filter :> states
states [| s1 |
states [| s2 |
s1 s2 transition-table initially-same?

View File

@ -1,5 +1,5 @@
USING: regexp tools.test kernel sequences regexp.parser regexp.private
regexp.traversal eval strings multiline accessors regexp.matchers ;
eval strings multiline accessors regexp.matchers ;
IN: regexp-tests
\ <regexp> must-infer

View File

@ -3,7 +3,7 @@
USING: accessors combinators kernel math sequences strings sets
assocs prettyprint.backend prettyprint.custom make lexer
namespaces parser arrays fry locals regexp.minimize
regexp.parser regexp.nfa regexp.dfa regexp.traversal
regexp.parser regexp.nfa regexp.dfa
regexp.transition-tables splitting sorting regexp.ast
regexp.negation regexp.matchers regexp.compiler ;
IN: regexp
@ -12,16 +12,16 @@ TUPLE: regexp
{ raw read-only }
{ parse-tree read-only }
{ options read-only }
dfa reverse-dfa dfa-quot ;
dfa reverse-dfa ;
: make-regexp ( string ast -- regexp )
f f <options> f f f regexp boa ; foldable
f f <options> f f regexp boa ; foldable
! Foldable because, when the dfa slot is set,
! it'll be set to the same thing regardless of who sets it
: <optioned-regexp> ( string options -- regexp )
[ dup parse-regexp ] [ string>options ] bi*
f f f regexp boa ;
f f regexp boa ;
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
@ -34,26 +34,25 @@ C: <reverse-matcher> reverse-matcher
[ parse-tree>> ] [ options>> ] bi <with-options> ;
: compile-regexp ( regexp -- regexp )
dup '[ [ _ get-ast ast>dfa ] unless* ] change-dfa ;
: compile-dfa-quot ( regexp -- regexp )
dup '[ [ _ compile-regexp dfa>> dfa>quotation ] unless* ] change-dfa-quot ;
dup '[ [ _ get-ast ast>dfa dfa>quotation ] unless* ] change-dfa ;
: <reversed-option> ( ast -- reversed )
"r" string>options <with-options> ;
: compile-reverse ( regexp -- regexp )
dup '[ [ _ get-ast <reversed-option> ast>dfa ] unless* ] change-reverse-dfa ;
dup '[
[
_ get-ast <reversed-option>
ast>dfa dfa>quotation
] unless*
] change-reverse-dfa ;
M: regexp match-index-from ( string regexp -- index/f )
dup dfa-quot>>
[ <quot-matcher> ]
[ compile-regexp dfa>> <dfa-matcher> ] ?if
match-index-from ;
compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
M: reverse-matcher match-index-from ( string regexp -- index/f )
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
<dfa-traverser> do-match match-index>> ;
<quot-matcher> match-index-from ;
: find-regexp-syntax ( string -- prefix suffix )
{