More regexp changes
parent
81b68eac48
commit
42ff154ead
|
@ -58,15 +58,8 @@ M: from-to <times>
|
|||
: char-class ( ranges ? -- term )
|
||||
[ <or-class> ] dip [ <not-class> ] when ;
|
||||
|
||||
TUPLE: lookahead term ;
|
||||
TUPLE: lookahead term positive? ;
|
||||
C: <lookahead> lookahead
|
||||
|
||||
TUPLE: lookbehind term ;
|
||||
TUPLE: lookbehind term positive? ;
|
||||
C: <lookbehind> lookbehind
|
||||
|
||||
TUPLE: possessive-star term ;
|
||||
C: <possessive-star> possessive-star
|
||||
|
||||
: <possessive-plus> ( term -- term' )
|
||||
dup <possessive-star> 2array <concatenation> ;
|
||||
|
||||
|
|
|
@ -239,6 +239,9 @@ M: not-class replace-question
|
|||
'[ _ _ replace-question ] assoc-map
|
||||
[ nip ] assoc-filter ;
|
||||
|
||||
: answers ( table questions answer -- new-table )
|
||||
'[ _ answer ] each ;
|
||||
|
||||
DEFER: make-condition
|
||||
|
||||
: (make-condition) ( table questions question -- condition )
|
||||
|
|
|
@ -36,21 +36,17 @@ M: $ question>quot
|
|||
M: ^ question>quot
|
||||
drop [ { [ drop zero? ] [ [ 1- ] dip ?nth "\r\n" member? ] } 2|| ] ;
|
||||
|
||||
! Maybe the condition>quot things can be combined, given a suitable method
|
||||
! for question>quot on classes, but maybe that'd make stack shuffling annoying
|
||||
|
||||
: execution-quot ( next-state -- quot )
|
||||
: (execution-quot) ( next-state -- quot )
|
||||
! The conditions here are for lookaround and anchors, etc
|
||||
dup condition? [
|
||||
[ question>> question>quot ] [ yes>> ] [ no>> ] tri
|
||||
[ execution-quot ] bi@
|
||||
[ (execution-quot) ] bi@
|
||||
'[ 2dup @ _ _ if ]
|
||||
] [
|
||||
! There shouldn't be a condition like this!
|
||||
dup sequence?
|
||||
[ [ [ 2drop ] ] [ first '[ _ execute ] ] if-empty ]
|
||||
[ '[ _ execute ] ] if
|
||||
] if ;
|
||||
] [ '[ _ execute ] ] if ;
|
||||
|
||||
: execution-quot ( next-state -- quot )
|
||||
dup sequence? [ first ] when
|
||||
(execution-quot) ;
|
||||
|
||||
TUPLE: box contents ;
|
||||
C: <box> box
|
||||
|
@ -66,8 +62,9 @@ C: <box> box
|
|||
[ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
|
||||
] if ;
|
||||
|
||||
: non-literals>dispatch ( non-literal-transitions -- quot )
|
||||
: non-literals>dispatch ( literals non-literals -- quot )
|
||||
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
||||
swap keys f answers
|
||||
table>condition [ <box> ] condition-map condition>quot ;
|
||||
|
||||
: literals>cases ( literal-transitions -- case-body )
|
||||
|
@ -84,7 +81,7 @@ C: <box> box
|
|||
|
||||
: split-literals ( transitions -- case default )
|
||||
>alist expand-or [ first integer? ] partition
|
||||
[ literals>cases ] [ non-literals>dispatch ] bi* ;
|
||||
[ [ literals>cases ] keep ] dip non-literals>dispatch ;
|
||||
|
||||
:: step ( last-match index str quot final? direction -- last-index/f )
|
||||
final? index last-match ?
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test regexp.minimize assocs regexp
|
||||
accessors regexp.transition-tables regexp.parser regexp.negation ;
|
||||
accessors regexp.transition-tables regexp.parser
|
||||
regexp.classes regexp.negation ;
|
||||
IN: regexp.minimize.tests
|
||||
|
||||
[ t ] [ 1 2 H{ { { 1 2 } t } } same-partition? ] unit-test
|
||||
|
@ -52,3 +53,6 @@ IN: regexp.minimize.tests
|
|||
] unit-test
|
||||
|
||||
[ [ ] [ ] while-changes ] must-infer
|
||||
|
||||
[ H{ { T{ or-class f { 1 2 } } 3 } { 4 5 } } ]
|
||||
[ H{ { 1 3 } { 2 3 } { 4 5 } } combine-state-transitions ] unit-test
|
||||
|
|
|
@ -11,8 +11,8 @@ IN: regexp.minimize
|
|||
: number-states ( table -- newtable )
|
||||
dup table>state-numbers transitions-at ;
|
||||
|
||||
: no-conditions? ( state transition-table -- ? )
|
||||
transitions>> at values [ condition? ] any? not ;
|
||||
: has-conditions? ( state transitions -- ? )
|
||||
at values [ condition? ] any? ;
|
||||
|
||||
: initially-same? ( s1 s2 transition-table -- ? )
|
||||
{
|
||||
|
@ -25,7 +25,8 @@ IN: regexp.minimize
|
|||
! Partition table is sorted-array => ?
|
||||
H{ } clone :> out
|
||||
transition-table transitions>> keys
|
||||
[ transition-table no-conditions? ] filter :> states
|
||||
[ transition-table transitions>> has-conditions? ] partition :> states
|
||||
[ dup 2array out conjoin ] each
|
||||
states [| s1 |
|
||||
states [| s2 |
|
||||
s1 s2 transition-table initially-same?
|
||||
|
@ -68,16 +69,27 @@ IN: regexp.minimize
|
|||
'[ _ partition-more ] [ assoc-size ] while-changes
|
||||
partition>classes ;
|
||||
|
||||
: canonical-state? ( state state-classes -- ? )
|
||||
dupd at = ;
|
||||
: canonical-state? ( state transitions state-classes -- ? )
|
||||
'[ dup _ at = ] swap '[ _ has-conditions? ] bi or ;
|
||||
|
||||
: delete-duplicates ( transitions state-classes -- new-transitions )
|
||||
'[ drop _ canonical-state? ] assoc-filter ;
|
||||
dupd '[ drop _ _ canonical-state? ] assoc-filter ;
|
||||
|
||||
: combine-states ( table -- smaller-table )
|
||||
dup state-classes
|
||||
[ transitions-at ] keep
|
||||
'[ _ delete-duplicates ] change-transitions ;
|
||||
|
||||
: combine-state-transitions ( hash -- hash )
|
||||
H{ } clone tuck '[
|
||||
_ [ 2array <or-class> ] change-at
|
||||
] assoc-each [ swap ] assoc-map ;
|
||||
|
||||
: combine-transitions ( table -- table )
|
||||
[ [ combine-state-transitions ] assoc-map ] change-transitions ;
|
||||
|
||||
: minimize ( table -- minimal-table )
|
||||
clone number-states combine-states ;
|
||||
clone
|
||||
number-states
|
||||
combine-states
|
||||
combine-transitions ;
|
||||
|
|
|
@ -43,11 +43,11 @@ CONSTANT: fail-state -1
|
|||
|
||||
: unify-final-state ( transition-table -- transition-table )
|
||||
dup [ final-states>> keys ] keep
|
||||
'[ -2 epsilon _ add-transition ] each
|
||||
'[ -2 epsilon _ set-transition ] each
|
||||
H{ { -2 -2 } } >>final-states ;
|
||||
|
||||
: adjoin-dfa ( transition-table -- start end )
|
||||
box-transitions unify-final-state renumber-states
|
||||
unify-final-state renumber-states box-transitions
|
||||
[ start-state>> ]
|
||||
[ final-states>> keys first ]
|
||||
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
|
||||
|
|
|
@ -138,10 +138,10 @@ Parenthized = "?:" Alternation:a => [[ a ]]
|
|||
=> [[ a on off parse-options <with-options> ]]
|
||||
| "?#" [^)]* => [[ f ]]
|
||||
| "?~" Alternation:a => [[ a <negation> ]]
|
||||
| "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
|
||||
| "?!" Alternation:a => [[ a <negation> <lookahead> <tagged-epsilon> ]]
|
||||
| "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
|
||||
| "?<!" Alternation:a => [[ a <negation> <lookbehind> <tagged-epsilon> ]]
|
||||
| "?=" Alternation:a => [[ a t <lookahead> <tagged-epsilon> ]]
|
||||
| "?!" Alternation:a => [[ a f <lookahead> <tagged-epsilon> ]]
|
||||
| "?<=" Alternation:a => [[ a t <lookbehind> <tagged-epsilon> ]]
|
||||
| "?<!" Alternation:a => [[ a f <lookbehind> <tagged-epsilon> ]]
|
||||
| Alternation
|
||||
|
||||
Element = "(" Parenthized:p ")" => [[ p ]]
|
||||
|
@ -158,8 +158,6 @@ Times = "," Number:n "}" => [[ 0 n <from-to> ]]
|
|||
| Number:n "," Number:m "}" => [[ n m <from-to> ]]
|
||||
|
||||
Repeated = Element:e "{" Times:t => [[ e t <times> ]]
|
||||
| Element:e "*+" => [[ e <possessive-star> ]]
|
||||
| Element:e "++" => [[ e <possessive-plus> ]]
|
||||
| Element:e "?" => [[ e <maybe> ]]
|
||||
| Element:e "*" => [[ e <star> ]]
|
||||
| Element:e "+" => [[ e <plus> ]]
|
||||
|
|
|
@ -24,8 +24,8 @@ IN: regexp-tests
|
|||
[ t ] [ "b" "b|" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "b|" <regexp> matches? ] unit-test
|
||||
[ f ] [ "" "|" <regexp> matches? ] unit-test
|
||||
[ f ] [ "" "|||||||" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "|" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "|||||||" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
||||
|
@ -182,7 +182,7 @@ IN: regexp-tests
|
|||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
|
@ -300,8 +300,10 @@ IN: regexp-tests
|
|||
|
||||
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||
|
||||
[ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
|
||||
[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||
[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||
[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
||||
|
@ -396,9 +398,9 @@ IN: regexp-tests
|
|||
[ f ] [ "\ra" R/ \Aa\Z/m matches? ] unit-test
|
||||
|
||||
[ 1 ] [ "a" R/ \Aa\Z/m count-matches ] unit-test
|
||||
[ 1 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
||||
[ 1 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
||||
[ 1 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
||||
[ 0 ] [ "\na" R/ \Aaa\Z/m count-matches ] unit-test
|
||||
[ 0 ] [ "\r\na" R/ \Aa\Z/m count-matches ] unit-test
|
||||
[ 0 ] [ "\ra" R/ \Aa\Z/m count-matches ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ ^a/m matches? ] unit-test
|
||||
[ f ] [ "\na" R/ ^a/m matches? ] unit-test
|
||||
|
|
|
@ -40,13 +40,18 @@ C: <reverse-matcher> reverse-matcher
|
|||
: <reversed-option> ( ast -- reversed )
|
||||
"r" string>options <with-options> ;
|
||||
|
||||
: maybe-negated ( lookaround quot -- regexp-quot )
|
||||
'[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ;
|
||||
|
||||
M: lookahead question>quot ! Returns ( index string -- ? )
|
||||
term>> ast>dfa dfa>shortest-quotation ;
|
||||
[ ast>dfa dfa>shortest-quotation ] maybe-negated ;
|
||||
|
||||
M: lookbehind question>quot ! Returns ( index string -- ? )
|
||||
term>> <reversed-option>
|
||||
[
|
||||
<reversed-option>
|
||||
ast>dfa dfa>reverse-shortest-quotation
|
||||
[ [ 1- ] dip ] prepose ;
|
||||
[ [ 1- ] dip ] prepose
|
||||
] maybe-negated ;
|
||||
|
||||
: compile-reverse ( regexp -- regexp )
|
||||
dup '[
|
||||
|
|
Loading…
Reference in New Issue