More regexp changes
parent
81b68eac48
commit
42ff154ead
|
@ -58,15 +58,8 @@ M: from-to <times>
|
||||||
: char-class ( ranges ? -- term )
|
: char-class ( ranges ? -- term )
|
||||||
[ <or-class> ] dip [ <not-class> ] when ;
|
[ <or-class> ] dip [ <not-class> ] when ;
|
||||||
|
|
||||||
TUPLE: lookahead term ;
|
TUPLE: lookahead term positive? ;
|
||||||
C: <lookahead> lookahead
|
C: <lookahead> lookahead
|
||||||
|
|
||||||
TUPLE: lookbehind term ;
|
TUPLE: lookbehind term positive? ;
|
||||||
C: <lookbehind> lookbehind
|
C: <lookbehind> lookbehind
|
||||||
|
|
||||||
TUPLE: possessive-star term ;
|
|
||||||
C: <possessive-star> possessive-star
|
|
||||||
|
|
||||||
: <possessive-plus> ( term -- term' )
|
|
||||||
dup <possessive-star> 2array <concatenation> ;
|
|
||||||
|
|
||||||
|
|
|
@ -239,6 +239,9 @@ M: not-class replace-question
|
||||||
'[ _ _ replace-question ] assoc-map
|
'[ _ _ replace-question ] assoc-map
|
||||||
[ nip ] assoc-filter ;
|
[ nip ] assoc-filter ;
|
||||||
|
|
||||||
|
: answers ( table questions answer -- new-table )
|
||||||
|
'[ _ answer ] each ;
|
||||||
|
|
||||||
DEFER: make-condition
|
DEFER: make-condition
|
||||||
|
|
||||||
: (make-condition) ( table questions question -- condition )
|
: (make-condition) ( table questions question -- condition )
|
||||||
|
|
|
@ -36,21 +36,17 @@ M: $ question>quot
|
||||||
M: ^ question>quot
|
M: ^ question>quot
|
||||||
drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
|
drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
|
||||||
|
|
||||||
! Maybe the condition>quot things can be combined, given a suitable method
|
: (execution-quot) ( next-state -- quot )
|
||||||
! for question>quot on classes, but maybe that'd make stack shuffling annoying
|
|
||||||
|
|
||||||
: execution-quot ( next-state -- quot )
|
|
||||||
! The conditions here are for lookaround and anchors, etc
|
! The conditions here are for lookaround and anchors, etc
|
||||||
dup condition? [
|
dup condition? [
|
||||||
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
|
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
|
||||||
[ execution-quot ] bi@
|
[ (execution-quot) ] bi@
|
||||||
'[ 2dup @ _ _ if ]
|
'[ 2dup @ _ _ if ]
|
||||||
] [
|
] [ '[ _ execute ] ] if ;
|
||||||
! There shouldn't be a condition like this!
|
|
||||||
dup sequence?
|
: execution-quot ( next-state -- quot )
|
||||||
[ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
|
dup sequence? [ first ] when
|
||||||
[ '[ _ execute ] ] if
|
(execution-quot) ;
|
||||||
] if ;
|
|
||||||
|
|
||||||
TUPLE: box contents ;
|
TUPLE: box contents ;
|
||||||
C: <box> box
|
C: <box> box
|
||||||
|
@ -66,8 +62,9 @@ C: <box> box
|
||||||
[ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
|
[ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
: non-literals>dispatch ( literals non-literals -- quot )
|
||||||
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
||||||
|
swap keys f answers
|
||||||
table>condition [ <box> ] condition-map condition>quot ;
|
table>condition [ <box> ] condition-map condition>quot ;
|
||||||
|
|
||||||
: literals>cases ( literal-transitions -- case-body )
|
: literals>cases ( literal-transitions -- case-body )
|
||||||
|
@ -84,7 +81,7 @@ C: <box> box
|
||||||
|
|
||||||
: split-literals ( transitions -- case default )
|
: split-literals ( transitions -- case default )
|
||||||
>alist expand-or [ first integer? ] partition
|
>alist expand-or [ first integer? ] partition
|
||||||
[ literals>cases ] [ non-literals>dispatch ] bi* ;
|
[ [ literals>cases ] keep ] dip non-literals>dispatch ;
|
||||||
|
|
||||||
:: step ( last-match index str quot final? direction -- last-index/f )
|
:: step ( last-match index str quot final? direction -- last-index/f )
|
||||||
final? index last-match ?
|
final? index last-match ?
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: tools.test regexp.minimize assocs regexp
|
USING: tools.test regexp.minimize assocs regexp
|
||||||
accessors regexp.transition-tables regexp.parser regexp.negation ;
|
accessors regexp.transition-tables regexp.parser
|
||||||
|
regexp.classes regexp.negation ;
|
||||||
IN: regexp.minimize.tests
|
IN: regexp.minimize.tests
|
||||||
|
|
||||||
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||||
|
@ -52,3 +53,6 @@ IN: regexp.minimize.tests
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[ [ ] [ ] while-changes ] must-infer
|
[ [ ] [ ] while-changes ] must-infer
|
||||||
|
|
||||||
|
[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ]
|
||||||
|
[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
|
||||||
|
|
|
@ -11,8 +11,8 @@ IN: regexp.minimize
|
||||||
: number-states ( table -- newtable )
|
: number-states ( table -- newtable )
|
||||||
dup table>state-numbers transitions-at ;
|
dup table>state-numbers transitions-at ;
|
||||||
|
|
||||||
: no-conditions? ( state transition-table -- ? )
|
: has-conditions? ( state transitions -- ? )
|
||||||
transitions>> at values [ condition? ] any? not ;
|
at values [ condition? ] any? ;
|
||||||
|
|
||||||
: initially-same? ( s1 s2 transition-table -- ? )
|
: initially-same? ( s1 s2 transition-table -- ? )
|
||||||
{
|
{
|
||||||
|
@ -25,7 +25,8 @@ IN: regexp.minimize
|
||||||
! Partition table is sorted-array => ?
|
! Partition table is sorted-array => ?
|
||||||
H{ } clone :> out
|
H{ } clone :> out
|
||||||
transition-table transitions>> keys
|
transition-table transitions>> keys
|
||||||
[ transition-table no-conditions? ] filter :> states
|
[ transition-table transitions>> has-conditions? ] partition :> states
|
||||||
|
[ dup 2array out conjoin ] each
|
||||||
states [| s1 |
|
states [| s1 |
|
||||||
states [| s2 |
|
states [| s2 |
|
||||||
s1 s2 transition-table initially-same?
|
s1 s2 transition-table initially-same?
|
||||||
|
@ -68,16 +69,27 @@ IN: regexp.minimize
|
||||||
'[ _ partition-more ] [ assoc-size ] while-changes
|
'[ _ partition-more ] [ assoc-size ] while-changes
|
||||||
partition>classes ;
|
partition>classes ;
|
||||||
|
|
||||||
: canonical-state? ( state state-classes -- ? )
|
: canonical-state? ( state transitions state-classes -- ? )
|
||||||
dupd at = ;
|
'[ dup _ at = ] swap '[ _ has-conditions? ] bi or ;
|
||||||
|
|
||||||
: delete-duplicates ( transitions state-classes -- new-transitions )
|
: delete-duplicates ( transitions state-classes -- new-transitions )
|
||||||
'[ drop _ canonical-state? ] assoc-filter ;
|
dupd '[ drop _ _ canonical-state? ] assoc-filter ;
|
||||||
|
|
||||||
: combine-states ( table -- smaller-table )
|
: combine-states ( table -- smaller-table )
|
||||||
dup state-classes
|
dup state-classes
|
||||||
[ transitions-at ] keep
|
[ transitions-at ] keep
|
||||||
'[ _ delete-duplicates ] change-transitions ;
|
'[ _ delete-duplicates ] change-transitions ;
|
||||||
|
|
||||||
|
: combine-state-transitions ( hash -- hash )
|
||||||
|
H{ } clone tuck '[
|
||||||
|
_ [ 2array <or-class> ] change-at
|
||||||
|
] assoc-each [ swap ] assoc-map ;
|
||||||
|
|
||||||
|
: combine-transitions ( table -- table )
|
||||||
|
[ [ combine-state-transitions ] assoc-map ] change-transitions ;
|
||||||
|
|
||||||
: minimize ( table -- minimal-table )
|
: minimize ( table -- minimal-table )
|
||||||
clone number-states combine-states ;
|
clone
|
||||||
|
number-states
|
||||||
|
combine-states
|
||||||
|
combine-transitions ;
|
||||||
|
|
|
@ -43,11 +43,11 @@ CONSTANT: fail-state -1
|
||||||
|
|
||||||
: unify-final-state ( transition-table -- transition-table )
|
: unify-final-state ( transition-table -- transition-table )
|
||||||
dup [ final-states>> keys ] keep
|
dup [ final-states>> keys ] keep
|
||||||
'[ -2 epsilon _ add-transition ] each
|
'[ -2 epsilon _ set-transition ] each
|
||||||
H{ { -2 -2 } } >>final-states ;
|
H{ { -2 -2 } } >>final-states ;
|
||||||
|
|
||||||
: adjoin-dfa ( transition-table -- start end )
|
: adjoin-dfa ( transition-table -- start end )
|
||||||
box-transitions unify-final-state renumber-states
|
unify-final-state renumber-states box-transitions
|
||||||
[ start-state>> ]
|
[ start-state>> ]
|
||||||
[ final-states>> keys first ]
|
[ final-states>> keys first ]
|
||||||
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
|
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
|
||||||
|
|
|
@ -138,10 +138,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
|
||||||
=> [[ a on off parse-options <with-options> ]]
|
=> [[ a on off parse-options <with-options> ]]
|
||||||
| "?#" [^)]* => [[ f ]]
|
| "?#" [^)]* => [[ f ]]
|
||||||
| "?~" Alternation:a => [[ a <negation> ]]
|
| "?~" Alternation:a => [[ a <negation> ]]
|
||||||
| "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
|
| "?=" Alternation:a => [[ a t <lookahead> <tagged-epsilon> ]]
|
||||||
| "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
|
| "?!" Alternation:a => [[ a f <lookahead> <tagged-epsilon> ]]
|
||||||
| "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
|
| "?<=" Alternation:a => [[ a t <lookbehind> <tagged-epsilon> ]]
|
||||||
| "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
|
| "?<!" Alternation:a => [[ a f <lookbehind> <tagged-epsilon> ]]
|
||||||
| Alternation
|
| Alternation
|
||||||
|
|
||||||
Element = "(" Parenthized:p ")" => [[ p ]]
|
Element = "(" Parenthized:p ")" => [[ p ]]
|
||||||
|
@ -158,8 +158,6 @@ Times = "," Number:n "}" => [[ 0 n <from-to> ]]
|
||||||
| Number:n "," Number:m "}" => [[ n m <from-to> ]]
|
| Number:n "," Number:m "}" => [[ n m <from-to> ]]
|
||||||
|
|
||||||
Repeated = Element:e "{" Times:t => [[ e t <times> ]]
|
Repeated = Element:e "{" Times:t => [[ e t <times> ]]
|
||||||
| Element:e "*+" => [[ e <possessive-star> ]]
|
|
||||||
| Element:e "++" => [[ e <possessive-plus> ]]
|
|
||||||
| Element:e "?" => [[ e <maybe> ]]
|
| Element:e "?" => [[ e <maybe> ]]
|
||||||
| Element:e "*" => [[ e <star> ]]
|
| Element:e "*" => [[ e <star> ]]
|
||||||
| Element:e "+" => [[ e <plus> ]]
|
| Element:e "+" => [[ e <plus> ]]
|
||||||
|
|
|
@ -24,8 +24,8 @@ IN: regexp-tests
|
||||||
[ t ] [ "b" "b|" <regexp> matches? ] unit-test
|
[ t ] [ "b" "b|" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "" "|" <regexp> matches? ] unit-test
|
[ t ] [ "" "|" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "" "|||||||" <regexp> matches? ] unit-test
|
[ t ] [ "" "|||||||" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
||||||
|
@ -182,7 +182,7 @@ IN: regexp-tests
|
||||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||||
|
@ -300,8 +300,10 @@ IN: regexp-tests
|
||||||
|
|
||||||
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||||
|
|
||||||
[ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
|
[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||||
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||||
|
[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
||||||
|
@ -396,9 +398,9 @@ IN: regexp-tests
|
||||||
[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
||||||
|
|
||||||
[ 1 ] [ "a" R/ \Aa\Z/m count-matches ] unit-test
|
[ 1 ] [ "a" R/ \Aa\Z/m count-matches ] unit-test
|
||||||
[ 1 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
[ 0 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
||||||
[ 1 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
[ 0 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
||||||
[ 1 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
[ 0 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
||||||
|
|
||||||
[ t ] [ "a" R/ ^a/m matches? ] unit-test
|
[ t ] [ "a" R/ ^a/m matches? ] unit-test
|
||||||
[ f ] [ "\na" R/ ^a/m matches? ] unit-test
|
[ f ] [ "\na" R/ ^a/m matches? ] unit-test
|
||||||
|
|
|
@ -40,13 +40,18 @@ C: <reverse-matcher> reverse-matcher
|
||||||
: <reversed-option> ( ast -- reversed )
|
: <reversed-option> ( ast -- reversed )
|
||||||
"r" string>options <with-options> ;
|
"r" string>options <with-options> ;
|
||||||
|
|
||||||
|
: maybe-negated ( lookaround quot -- regexp-quot )
|
||||||
|
'[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ;
|
||||||
|
|
||||||
M: lookahead question>quot ! Returns ( index string -- ? )
|
M: lookahead question>quot ! Returns ( index string -- ? )
|
||||||
term>> ast>dfa dfa>shortest-quotation ;
|
[ ast>dfa dfa>shortest-quotation ] maybe-negated ;
|
||||||
|
|
||||||
M: lookbehind question>quot ! Returns ( index string -- ? )
|
M: lookbehind question>quot ! Returns ( index string -- ? )
|
||||||
term>> <reversed-option>
|
[
|
||||||
ast>dfa dfa>reverse-shortest-quotation
|
<reversed-option>
|
||||||
[ [ 1- ] dip ] prepose ;
|
ast>dfa dfa>reverse-shortest-quotation
|
||||||
|
[ [ 1- ] dip ] prepose
|
||||||
|
] maybe-negated ;
|
||||||
|
|
||||||
: compile-reverse ( regexp -- regexp )
|
: compile-reverse ( regexp -- regexp )
|
||||||
dup '[
|
dup '[
|
||||||
|
|
Loading…
Reference in New Issue