Unfinished changes for regexp lookaround
parent
4f306518dc
commit
ca19a1b728
|
@ -3,6 +3,8 @@
|
||||||
USING: regexp.classes tools.test arrays kernel ;
|
USING: regexp.classes tools.test arrays kernel ;
|
||||||
IN: regexp.classes.tests
|
IN: regexp.classes.tests
|
||||||
|
|
||||||
|
! Class algebra
|
||||||
|
|
||||||
[ f ] [ { 1 2 } <and-class> ] unit-test
|
[ f ] [ { 1 2 } <and-class> ] unit-test
|
||||||
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
|
[ T{ or-class f { 2 1 } } ] [ { 1 2 } <or-class> ] unit-test
|
||||||
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
|
[ 3 ] [ { 1 2 } <and-class> 3 2array <or-class> ] unit-test
|
||||||
|
@ -25,3 +27,28 @@ IN: regexp.classes.tests
|
||||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <and-class> ] unit-test
|
||||||
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
[ T{ primitive-class { class letter-class } } ] [ letter-class <primitive-class> dup 2array <or-class> ] unit-test
|
||||||
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
[ T{ or-class { seq { 2 3 1 } } } ] [ { 1 2 } <or-class> { 2 3 } <or-class> 2array <or-class> ] unit-test
|
||||||
|
|
||||||
|
! Making classes into nested conditionals
|
||||||
|
|
||||||
|
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
|
||||||
|
[ { 3 } ] [ { { t 3 } } table>condition ] unit-test
|
||||||
|
[ { T{ primitive-class } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>questions ] unit-test
|
||||||
|
[ { { t 1 } { t 2 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } t answer ] unit-test
|
||||||
|
[ { { t 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } T{ primitive-class } f answer ] unit-test
|
||||||
|
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { t 1 } { T{ primitive-class } 2 } } table>condition ] unit-test
|
||||||
|
|
||||||
|
SYMBOL: foo
|
||||||
|
SYMBOL: bar
|
||||||
|
|
||||||
|
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 2 3 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { t 1 } { T{ primitive-class f foo } 2 } { T{ primitive-class f bar } 3 } } table>condition ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
|
||||||
|
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
|
||||||
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t replace-question ] unit-test
|
||||||
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f replace-question ] unit-test
|
||||||
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t replace-question ] unit-test
|
||||||
|
[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t replace-question ] unit-test
|
||||||
|
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f replace-question ] unit-test
|
||||||
|
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f replace-question ] unit-test
|
||||||
|
[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t replace-question ] unit-test
|
||||||
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f replace-question ] unit-test
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors kernel math math.order words combinators locals
|
USING: accessors kernel math math.order words combinators locals
|
||||||
ascii unicode.categories combinators.short-circuit sequences
|
ascii unicode.categories combinators.short-circuit sequences
|
||||||
fry macros arrays ;
|
fry macros arrays assocs sets ;
|
||||||
IN: regexp.classes
|
IN: regexp.classes
|
||||||
|
|
||||||
SINGLETONS: any-char any-char-no-nl
|
SINGLETONS: any-char any-char-no-nl
|
||||||
|
@ -208,3 +208,57 @@ M: primitive-class class-member?
|
||||||
class>> class-member? ;
|
class>> class-member? ;
|
||||||
|
|
||||||
UNION: class primitive-class not-class or-class and-class range ;
|
UNION: class primitive-class not-class or-class and-class range ;
|
||||||
|
|
||||||
|
TUPLE: condition question yes no ;
|
||||||
|
C: <condition> condition
|
||||||
|
|
||||||
|
GENERIC# replace-question 2 ( class from to -- new-class )
|
||||||
|
|
||||||
|
M:: object replace-question ( class from to -- new-class )
|
||||||
|
class from = to class ? ;
|
||||||
|
|
||||||
|
: replace-compound ( class from to -- seq )
|
||||||
|
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
||||||
|
|
||||||
|
M: and-class replace-question
|
||||||
|
replace-compound <and-class> ;
|
||||||
|
|
||||||
|
M: or-class replace-question
|
||||||
|
replace-compound <or-class> ;
|
||||||
|
|
||||||
|
M: not-class replace-question
|
||||||
|
class>> replace-question <not-class> ;
|
||||||
|
|
||||||
|
: answer ( table question answer -- new-table )
|
||||||
|
'[ [ _ _ replace-question ] dip ] assoc-map
|
||||||
|
[ drop ] assoc-filter ;
|
||||||
|
|
||||||
|
DEFER: make-condition
|
||||||
|
|
||||||
|
: (make-condition) ( table questions question -- condition )
|
||||||
|
[ 2nip ]
|
||||||
|
[ swap [ t answer ] dip make-condition ]
|
||||||
|
[ swap [ f answer ] dip make-condition ] 3tri
|
||||||
|
2dup = [ 2nip ] [ <condition> ] if ;
|
||||||
|
|
||||||
|
: make-condition ( table questions -- condition )
|
||||||
|
[ values ] [ unclip (make-condition) ] if-empty ;
|
||||||
|
|
||||||
|
GENERIC: class>questions ( class -- questions )
|
||||||
|
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
||||||
|
M: or-class class>questions compound-questions ;
|
||||||
|
M: and-class class>questions compound-questions ;
|
||||||
|
M: not-class class>questions class>> class>questions ;
|
||||||
|
M: object class>questions 1array ;
|
||||||
|
|
||||||
|
: table>questions ( table -- questions )
|
||||||
|
keys <and-class> class>questions t swap remove ;
|
||||||
|
|
||||||
|
: table>condition ( table -- condition )
|
||||||
|
>alist dup table>questions make-condition ;
|
||||||
|
|
||||||
|
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
|
||||||
|
over condition? [
|
||||||
|
[ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
|
||||||
|
'[ _ condition-map ] bi@ <condition>
|
||||||
|
] [ call ] if ; inline recursive
|
||||||
|
|
|
@ -9,9 +9,17 @@ IN: regexp.compiler
|
||||||
: literals>cases ( literal-transitions -- case-body )
|
: literals>cases ( literal-transitions -- case-body )
|
||||||
[ 1quotation ] assoc-map ;
|
[ 1quotation ] assoc-map ;
|
||||||
|
|
||||||
|
: condition>quot ( condition -- quot )
|
||||||
|
dup condition? [
|
||||||
|
[ question>> ] [ yes>> ] [ no>> ] tri
|
||||||
|
[ condition>quot ] bi@
|
||||||
|
'[ dup _ class-member? _ _ if ]
|
||||||
|
] [
|
||||||
|
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
|
||||||
|
] if ;
|
||||||
|
|
||||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||||
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
|
table>condition condition>quot ;
|
||||||
[ 3drop ] suffix '[ _ cond ] ;
|
|
||||||
|
|
||||||
: expand-one-or ( or-class transition -- alist )
|
: expand-one-or ( or-class transition -- alist )
|
||||||
[ seq>> ] dip '[ _ 2array ] map ;
|
[ seq>> ] dip '[ _ 2array ] map ;
|
||||||
|
@ -36,7 +44,7 @@ IN: regexp.compiler
|
||||||
|
|
||||||
: transitions>quot ( transitions final-state? -- quot )
|
: transitions>quot ( transitions final-state? -- quot )
|
||||||
[ split-literals suffix ] dip
|
[ split-literals suffix ] dip
|
||||||
'[ { array-capacity string } declare _ _ step ] ;
|
'[ { array-capacity sequence } declare _ _ step ] ;
|
||||||
|
|
||||||
: word>quot ( word dfa -- quot )
|
: word>quot ( word dfa -- quot )
|
||||||
[ transitions>> at ]
|
[ transitions>> at ]
|
||||||
|
@ -67,11 +75,12 @@ IN: regexp.compiler
|
||||||
: dfa>word ( dfa -- word )
|
: dfa>word ( dfa -- word )
|
||||||
states>words [ states>code ] keep start-state>> ;
|
states>words [ states>code ] keep start-state>> ;
|
||||||
|
|
||||||
: check-string ( string -- string )
|
: check-sequence ( string -- string )
|
||||||
dup string? [ "String required" throw ] unless ;
|
! Make this configurable
|
||||||
|
dup sequence? [ "String required" throw ] unless ;
|
||||||
|
|
||||||
: run-regexp ( start-index string word -- ? )
|
: run-regexp ( start-index string word -- ? )
|
||||||
{ [ f ] [ >fixnum ] [ check-string ] [ execute ] } spread ; inline
|
{ [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
|
||||||
|
|
||||||
: dfa>quotation ( dfa -- quot )
|
: dfa>quotation ( dfa -- quot )
|
||||||
dfa>word '[ _ run-regexp ] ;
|
dfa>word '[ _ run-regexp ] ;
|
||||||
|
|
|
@ -8,9 +8,6 @@ IN: regexp.dfa
|
||||||
: find-delta ( states transition nfa -- new-states )
|
: find-delta ( states transition nfa -- new-states )
|
||||||
transitions>> '[ _ swap _ at at ] gather sift ;
|
transitions>> '[ _ swap _ at at ] gather sift ;
|
||||||
|
|
||||||
TUPLE: condition question yes no ;
|
|
||||||
C: <condition> condition
|
|
||||||
|
|
||||||
:: epsilon-loop ( state table nfa question -- )
|
:: epsilon-loop ( state table nfa question -- )
|
||||||
state table at :> old-value
|
state table at :> old-value
|
||||||
old-value question 2array <or-class> :> new-question
|
old-value question 2array <or-class> :> new-question
|
||||||
|
@ -27,53 +24,12 @@ C: <condition> condition
|
||||||
] assoc-each
|
] assoc-each
|
||||||
] unless ;
|
] unless ;
|
||||||
|
|
||||||
GENERIC# replace-question 2 ( class from to -- new-class )
|
|
||||||
|
|
||||||
M: object replace-question
|
|
||||||
[ [ = ] keep ] dip swap ? ;
|
|
||||||
|
|
||||||
: replace-compound ( class from to -- seq )
|
|
||||||
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
|
||||||
|
|
||||||
M: and-class replace-question
|
|
||||||
replace-compound <and-class> ;
|
|
||||||
|
|
||||||
M: or-class replace-question
|
|
||||||
replace-compound <or-class> ;
|
|
||||||
|
|
||||||
: answer ( table question answer -- new-table )
|
|
||||||
'[ _ _ replace-question ] assoc-map
|
|
||||||
[ nip ] assoc-filter ;
|
|
||||||
|
|
||||||
DEFER: make-condition
|
|
||||||
|
|
||||||
: (make-condition) ( table questions question -- condition )
|
|
||||||
[ 2nip ]
|
|
||||||
[ swap [ t answer ] dip make-condition ]
|
|
||||||
[ swap [ f answer ] dip make-condition ] 3tri
|
|
||||||
<condition> ;
|
|
||||||
|
|
||||||
: make-condition ( table questions -- condition )
|
|
||||||
[ keys ] [ unclip (make-condition) ] if-empty ;
|
|
||||||
|
|
||||||
GENERIC: class>questions ( class -- questions )
|
|
||||||
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
|
|
||||||
M: or-class class>questions compound-questions ;
|
|
||||||
M: and-class class>questions compound-questions ;
|
|
||||||
M: object class>questions 1array ;
|
|
||||||
|
|
||||||
: table>condition ( table -- condition )
|
|
||||||
! This is wrong, since actually an arbitrary and-class or or-class can be used
|
|
||||||
dup
|
|
||||||
values <or-class> class>questions t swap remove
|
|
||||||
make-condition ;
|
|
||||||
|
|
||||||
: epsilon-table ( states nfa -- table )
|
: epsilon-table ( states nfa -- table )
|
||||||
[ H{ } clone tuck ] dip
|
[ H{ } clone tuck ] dip
|
||||||
'[ _ _ t epsilon-loop ] each ;
|
'[ _ _ t epsilon-loop ] each ;
|
||||||
|
|
||||||
: find-epsilon-closure ( states nfa -- dfa-state )
|
: find-epsilon-closure ( states nfa -- dfa-state )
|
||||||
epsilon-table table>condition ;
|
epsilon-table [ swap ] assoc-map table>condition ;
|
||||||
|
|
||||||
: find-closure ( states transition nfa -- new-states )
|
: find-closure ( states transition nfa -- new-states )
|
||||||
[ find-delta ] keep find-epsilon-closure ;
|
[ find-delta ] keep find-epsilon-closure ;
|
||||||
|
|
|
@ -2,13 +2,13 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel sequences regexp.transition-tables fry assocs
|
USING: kernel sequences regexp.transition-tables fry assocs
|
||||||
accessors locals math sorting arrays sets hashtables regexp.dfa
|
accessors locals math sorting arrays sets hashtables regexp.dfa
|
||||||
combinators.short-circuit ;
|
combinators.short-circuit regexp.classes ;
|
||||||
IN: regexp.minimize
|
IN: regexp.minimize
|
||||||
|
|
||||||
: number-transitions ( transitions numbering -- new-transitions )
|
: number-transitions ( transitions numbering -- new-transitions )
|
||||||
dup '[
|
dup '[
|
||||||
[ _ at ]
|
[ _ at ]
|
||||||
[ [ _ at ] assoc-map ] bi*
|
[ [ [ _ at ] condition-map ] assoc-map ] bi*
|
||||||
] assoc-map ;
|
] assoc-map ;
|
||||||
|
|
||||||
: table>state-numbers ( table -- assoc )
|
: table>state-numbers ( table -- assoc )
|
||||||
|
@ -29,6 +29,9 @@ IN: regexp.minimize
|
||||||
dup table>state-numbers
|
dup table>state-numbers
|
||||||
[ number-transitions ] rewrite-transitions ;
|
[ number-transitions ] rewrite-transitions ;
|
||||||
|
|
||||||
|
: no-conditions? ( state transition-table -- ? )
|
||||||
|
transitions>> at values [ condition? ] any? not ;
|
||||||
|
|
||||||
: initially-same? ( s1 s2 transition-table -- ? )
|
: initially-same? ( s1 s2 transition-table -- ? )
|
||||||
{
|
{
|
||||||
[ drop <= ]
|
[ drop <= ]
|
||||||
|
@ -39,7 +42,8 @@ IN: regexp.minimize
|
||||||
:: initialize-partitions ( transition-table -- partitions )
|
:: initialize-partitions ( transition-table -- partitions )
|
||||||
! Partition table is sorted-array => ?
|
! Partition table is sorted-array => ?
|
||||||
H{ } clone :> out
|
H{ } clone :> out
|
||||||
transition-table transitions>> keys :> states
|
transition-table transitions>> keys
|
||||||
|
[ transition-table no-conditions? ] filter :> states
|
||||||
states [| s1 |
|
states [| s1 |
|
||||||
states [| s2 |
|
states [| s2 |
|
||||||
s1 s2 transition-table initially-same?
|
s1 s2 transition-table initially-same?
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
USING: regexp tools.test kernel sequences regexp.parser regexp.private
|
USING: regexp tools.test kernel sequences regexp.parser regexp.private
|
||||||
regexp.traversal eval strings multiline accessors regexp.matchers ;
|
eval strings multiline accessors regexp.matchers ;
|
||||||
IN: regexp-tests
|
IN: regexp-tests
|
||||||
|
|
||||||
\ <regexp> must-infer
|
\ <regexp> must-infer
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
USING: accessors combinators kernel math sequences strings sets
|
USING: accessors combinators kernel math sequences strings sets
|
||||||
assocs prettyprint.backend prettyprint.custom make lexer
|
assocs prettyprint.backend prettyprint.custom make lexer
|
||||||
namespaces parser arrays fry locals regexp.minimize
|
namespaces parser arrays fry locals regexp.minimize
|
||||||
regexp.parser regexp.nfa regexp.dfa regexp.traversal
|
regexp.parser regexp.nfa regexp.dfa
|
||||||
regexp.transition-tables splitting sorting regexp.ast
|
regexp.transition-tables splitting sorting regexp.ast
|
||||||
regexp.negation regexp.matchers regexp.compiler ;
|
regexp.negation regexp.matchers regexp.compiler ;
|
||||||
IN: regexp
|
IN: regexp
|
||||||
|
@ -12,16 +12,16 @@ TUPLE: regexp
|
||||||
{ raw read-only }
|
{ raw read-only }
|
||||||
{ parse-tree read-only }
|
{ parse-tree read-only }
|
||||||
{ options read-only }
|
{ options read-only }
|
||||||
dfa reverse-dfa dfa-quot ;
|
dfa reverse-dfa ;
|
||||||
|
|
||||||
: make-regexp ( string ast -- regexp )
|
: make-regexp ( string ast -- regexp )
|
||||||
f f <options> f f f regexp boa ; foldable
|
f f <options> f f regexp boa ; foldable
|
||||||
! Foldable because, when the dfa slot is set,
|
! Foldable because, when the dfa slot is set,
|
||||||
! it'll be set to the same thing regardless of who sets it
|
! it'll be set to the same thing regardless of who sets it
|
||||||
|
|
||||||
: <optioned-regexp> ( string options -- regexp )
|
: <optioned-regexp> ( string options -- regexp )
|
||||||
[ dup parse-regexp ] [ string>options ] bi*
|
[ dup parse-regexp ] [ string>options ] bi*
|
||||||
f f f regexp boa ;
|
f f regexp boa ;
|
||||||
|
|
||||||
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
||||||
|
|
||||||
|
@ -34,26 +34,25 @@ C: <reverse-matcher> reverse-matcher
|
||||||
[ parse-tree>> ] [ options>> ] bi <with-options> ;
|
[ parse-tree>> ] [ options>> ] bi <with-options> ;
|
||||||
|
|
||||||
: compile-regexp ( regexp -- regexp )
|
: compile-regexp ( regexp -- regexp )
|
||||||
dup '[ [ _ get-ast ast>dfa ] unless* ] change-dfa ;
|
dup '[ [ _ get-ast ast>dfa dfa>quotation ] unless* ] change-dfa ;
|
||||||
|
|
||||||
: compile-dfa-quot ( regexp -- regexp )
|
|
||||||
dup '[ [ _ compile-regexp dfa>> dfa>quotation ] unless* ] change-dfa-quot ;
|
|
||||||
|
|
||||||
: <reversed-option> ( ast -- reversed )
|
: <reversed-option> ( ast -- reversed )
|
||||||
"r" string>options <with-options> ;
|
"r" string>options <with-options> ;
|
||||||
|
|
||||||
: compile-reverse ( regexp -- regexp )
|
: compile-reverse ( regexp -- regexp )
|
||||||
dup '[ [ _ get-ast <reversed-option> ast>dfa ] unless* ] change-reverse-dfa ;
|
dup '[
|
||||||
|
[
|
||||||
|
_ get-ast <reversed-option>
|
||||||
|
ast>dfa dfa>quotation
|
||||||
|
] unless*
|
||||||
|
] change-reverse-dfa ;
|
||||||
|
|
||||||
M: regexp match-index-from ( string regexp -- index/f )
|
M: regexp match-index-from ( string regexp -- index/f )
|
||||||
dup dfa-quot>>
|
compile-regexp dfa-quot>> <quot-matcher> match-index-from ;
|
||||||
[ <quot-matcher> ]
|
|
||||||
[ compile-regexp dfa>> <dfa-matcher> ] ?if
|
|
||||||
match-index-from ;
|
|
||||||
|
|
||||||
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
||||||
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
||||||
<dfa-traverser> do-match match-index>> ;
|
<quot-matcher> match-index-from ;
|
||||||
|
|
||||||
: find-regexp-syntax ( string -- prefix suffix )
|
: find-regexp-syntax ( string -- prefix suffix )
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue