Merge branch 'master' of git://factorcode.org/git/factor
commit
7d9389ad9e
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
|
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences kernel regexp.combinators regexp.matchers strings unicode.case
|
USING: sequences kernel regexp.combinators strings unicode.case
|
||||||
peg.ebnf regexp arrays ;
|
peg.ebnf regexp arrays ;
|
||||||
IN: globs
|
IN: globs
|
||||||
|
|
||||||
|
|
|
@ -30,15 +30,15 @@ IN: regexp.classes.tests
|
||||||
[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
|
[ T{ or-class { seq { 3 2 } } } ] [ { 2 3 } <or-class> 1 <not-class> 2array <and-class> ] unit-test
|
||||||
[ f ] [ t <not-class> ] unit-test
|
[ f ] [ t <not-class> ] unit-test
|
||||||
[ t ] [ f <not-class> ] unit-test
|
[ t ] [ f <not-class> ] unit-test
|
||||||
[ f ] [ 1 <not-class> 1 t replace-question ] unit-test
|
[ f ] [ 1 <not-class> 1 t answer ] unit-test
|
||||||
|
|
||||||
! Making classes into nested conditionals
|
! Making classes into nested conditionals
|
||||||
|
|
||||||
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
|
[ V{ 1 2 3 4 } ] [ T{ and-class f { 1 T{ not-class f 2 } T{ or-class f { 3 4 } } 2 } } class>questions ] unit-test
|
||||||
[ { 3 } ] [ { { 3 t } } table>condition ] unit-test
|
[ { 3 } ] [ { { 3 t } } table>condition ] unit-test
|
||||||
[ { T{ primitive-class } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>questions ] unit-test
|
[ { T{ primitive-class } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>questions ] unit-test
|
||||||
[ { { 1 t } { 2 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } t answer ] unit-test
|
[ { { 1 t } { 2 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } t assoc-answer ] unit-test
|
||||||
[ { { 1 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } f answer ] unit-test
|
[ { { 1 t } } ] [ { { 1 t } { 2 T{ primitive-class } } } T{ primitive-class } f assoc-answer ] unit-test
|
||||||
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>condition ] unit-test
|
[ T{ condition f T{ primitive-class } { 1 2 } { 1 } } ] [ { { 1 t } { 2 T{ primitive-class } } } table>condition ] unit-test
|
||||||
|
|
||||||
SYMBOL: foo
|
SYMBOL: foo
|
||||||
|
@ -46,13 +46,13 @@ SYMBOL: bar
|
||||||
|
|
||||||
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 3 2 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 3 T{ primitive-class f bar } } { 2 T{ primitive-class f foo } } } table>condition ] unit-test
|
[ T{ condition f T{ primitive-class f bar } T{ condition f T{ primitive-class f foo } { 1 3 2 } { 1 3 } } T{ condition f T{ primitive-class f foo } { 1 2 } { 1 } } } ] [ { { 1 t } { 3 T{ primitive-class f bar } } { 2 T{ primitive-class f foo } } } table>condition ] unit-test
|
||||||
|
|
||||||
[ t ] [ foo <primitive-class> dup t replace-question ] unit-test
|
[ t ] [ foo <primitive-class> dup t answer ] unit-test
|
||||||
[ f ] [ foo <primitive-class> dup f replace-question ] unit-test
|
[ f ] [ foo <primitive-class> dup f answer ] unit-test
|
||||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t replace-question ] unit-test
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> t answer ] unit-test
|
||||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f replace-question ] unit-test
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> f answer ] unit-test
|
||||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t replace-question ] unit-test
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> t answer ] unit-test
|
||||||
[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t replace-question ] unit-test
|
[ T{ primitive-class f bar } ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> t answer ] unit-test
|
||||||
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f replace-question ] unit-test
|
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> foo <primitive-class> f answer ] unit-test
|
||||||
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f replace-question ] unit-test
|
[ f ] [ foo <primitive-class> bar <primitive-class> 2array <and-class> bar <primitive-class> f answer ] unit-test
|
||||||
[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t replace-question ] unit-test
|
[ t ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> t answer ] unit-test
|
||||||
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f replace-question ] unit-test
|
[ T{ primitive-class f foo } ] [ foo <primitive-class> bar <primitive-class> 2array <or-class> bar <primitive-class> f answer ] unit-test
|
||||||
|
|
|
@ -163,20 +163,32 @@ M: integer combine-or
|
||||||
: try-combine ( elt1 elt2 quot -- combined/f ? )
|
: try-combine ( elt1 elt2 quot -- combined/f ? )
|
||||||
3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
|
3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
|
||||||
|
|
||||||
|
DEFER: answer
|
||||||
|
|
||||||
|
:: try-cancel ( elt1 elt2 empty -- combined/f ? )
|
||||||
|
[ elt1 elt2 empty answer dup elt1 = not ] try-combine ;
|
||||||
|
|
||||||
:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq )
|
:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq )
|
||||||
f :> combined!
|
f :> combined!
|
||||||
seq [ elt quot try-combine swap combined! ] find drop
|
seq [ elt quot call swap combined! ] find drop
|
||||||
[ seq remove-nth combined prefix ]
|
[ seq remove-nth combined prefix ]
|
||||||
[ seq elt prefix ] if* ; inline
|
[ seq elt prefix ] if* ; inline
|
||||||
|
|
||||||
|
: combine-by ( seq quot -- new-seq )
|
||||||
|
{ } swap '[ _ prefix-combining ] reduce ; inline
|
||||||
|
|
||||||
|
:: seq>instance ( seq empty class -- instance )
|
||||||
|
seq length {
|
||||||
|
{ 0 [ empty ] }
|
||||||
|
{ 1 [ seq first ] }
|
||||||
|
[ drop class new seq >>seq ]
|
||||||
|
} case ; inline
|
||||||
|
|
||||||
:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
|
:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
|
||||||
seq class flatten
|
seq class flatten
|
||||||
{ } [ quot prefix-combining ] reduce
|
[ quot try-combine ] combine-by
|
||||||
dup length {
|
! [ empty try-cancel ] combine-by ! This makes the algorithm O(n^4)
|
||||||
{ 0 [ drop empty ] }
|
empty class seq>instance ; inline
|
||||||
{ 1 [ first ] }
|
|
||||||
[ drop class new swap >>seq ]
|
|
||||||
} case ; inline
|
|
||||||
|
|
||||||
: <and-class> ( seq -- class )
|
: <and-class> ( seq -- class )
|
||||||
[ combine-and ] t and-class combine ;
|
[ combine-and ] t and-class combine ;
|
||||||
|
@ -218,36 +230,36 @@ UNION: class primitive-class not-class or-class and-class range ;
|
||||||
TUPLE: condition question yes no ;
|
TUPLE: condition question yes no ;
|
||||||
C: <condition> condition
|
C: <condition> condition
|
||||||
|
|
||||||
GENERIC# replace-question 2 ( class from to -- new-class )
|
GENERIC# answer 2 ( class from to -- new-class )
|
||||||
|
|
||||||
M:: object replace-question ( class from to -- new-class )
|
M:: object answer ( class from to -- new-class )
|
||||||
class from = to class ? ;
|
class from = to class ? ;
|
||||||
|
|
||||||
: replace-compound ( class from to -- seq )
|
: replace-compound ( class from to -- seq )
|
||||||
[ seq>> ] 2dip '[ _ _ replace-question ] map ;
|
[ seq>> ] 2dip '[ _ _ answer ] map ;
|
||||||
|
|
||||||
M: and-class replace-question
|
M: and-class answer
|
||||||
replace-compound <and-class> ;
|
replace-compound <and-class> ;
|
||||||
|
|
||||||
M: or-class replace-question
|
M: or-class answer
|
||||||
replace-compound <or-class> ;
|
replace-compound <or-class> ;
|
||||||
|
|
||||||
M: not-class replace-question
|
M: not-class answer
|
||||||
[ class>> ] 2dip replace-question <not-class> ;
|
[ class>> ] 2dip answer <not-class> ;
|
||||||
|
|
||||||
: answer ( table question answer -- new-table )
|
: assoc-answer ( table question answer -- new-table )
|
||||||
'[ _ _ replace-question ] assoc-map
|
'[ _ _ answer ] assoc-map
|
||||||
[ nip ] assoc-filter ;
|
[ nip ] assoc-filter ;
|
||||||
|
|
||||||
: answers ( table questions answer -- new-table )
|
: assoc-answers ( table questions answer -- new-table )
|
||||||
'[ _ answer ] each ;
|
'[ _ assoc-answer ] each ;
|
||||||
|
|
||||||
DEFER: make-condition
|
DEFER: make-condition
|
||||||
|
|
||||||
: (make-condition) ( table questions question -- condition )
|
: (make-condition) ( table questions question -- condition )
|
||||||
[ 2nip ]
|
[ 2nip ]
|
||||||
[ swap [ t answer ] dip make-condition ]
|
[ swap [ t assoc-answer ] dip make-condition ]
|
||||||
[ swap [ f answer ] dip make-condition ] 3tri
|
[ swap [ f assoc-answer ] dip make-condition ] 3tri
|
||||||
2dup = [ 2nip ] [ <condition> ] if ;
|
2dup = [ 2nip ] [ <condition> ] if ;
|
||||||
|
|
||||||
: make-condition ( table questions -- condition )
|
: make-condition ( table questions -- condition )
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.combinators tools.test regexp kernel sequences regexp.matchers ;
|
USING: regexp.combinators tools.test regexp kernel sequences ;
|
||||||
IN: regexp.combinators.tests
|
IN: regexp.combinators.tests
|
||||||
|
|
||||||
: strings ( -- regexp )
|
: strings ( -- regexp )
|
||||||
|
@ -16,7 +16,7 @@ USE: multiline
|
||||||
{ R' .*a' R' b.*' } <and> ;
|
{ R' .*a' R' b.*' } <and> ;
|
||||||
|
|
||||||
[ t ] [ "bljhasflsda" conj matches? ] unit-test
|
[ t ] [ "bljhasflsda" conj matches? ] unit-test
|
||||||
[ f ] [ "bsdfdfs" conj matches? ] unit-test ! why does this fail?
|
[ f ] [ "bsdfdfs" conj matches? ] unit-test
|
||||||
[ f ] [ "fsfa" conj matches? ] unit-test
|
[ f ] [ "fsfa" conj matches? ] unit-test
|
||||||
|
|
||||||
[ f ] [ "bljhasflsda" conj <not> matches? ] unit-test
|
[ f ] [ "bljhasflsda" conj <not> matches? ] unit-test
|
||||||
|
|
|
@ -1,19 +1,19 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg.
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.classes kernel sequences regexp.negation
|
USING: regexp.classes kernel sequences regexp.negation
|
||||||
quotations regexp.minimize assocs fry math locals combinators
|
quotations assocs fry math locals combinators
|
||||||
accessors words compiler.units kernel.private strings
|
accessors words compiler.units kernel.private strings
|
||||||
sequences.private arrays regexp.matchers call namespaces
|
sequences.private arrays call namespaces
|
||||||
regexp.transition-tables combinators.short-circuit ;
|
regexp.transition-tables combinators.short-circuit ;
|
||||||
IN: regexp.compiler
|
IN: regexp.compiler
|
||||||
|
|
||||||
GENERIC: question>quot ( question -- quot )
|
GENERIC: question>quot ( question -- quot )
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
|
|
||||||
SYMBOL: shortest?
|
SYMBOL: shortest?
|
||||||
SYMBOL: backwards?
|
SYMBOL: backwards?
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
M: t question>quot drop [ 2drop t ] ;
|
M: t question>quot drop [ 2drop t ] ;
|
||||||
|
|
||||||
M: beginning-of-input question>quot
|
M: beginning-of-input question>quot
|
||||||
|
@ -64,7 +64,7 @@ C: <box> box
|
||||||
|
|
||||||
: non-literals>dispatch ( literals non-literals -- quot )
|
: non-literals>dispatch ( literals non-literals -- quot )
|
||||||
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
[ swap ] assoc-map ! we want state => predicate, and get the opposite as input
|
||||||
swap keys f answers
|
swap keys f assoc-answers
|
||||||
table>condition [ <box> ] condition-map condition>quot ;
|
table>condition [ <box> ] condition-map condition>quot ;
|
||||||
|
|
||||||
: literals>cases ( literal-transitions -- case-body )
|
: literals>cases ( literal-transitions -- case-body )
|
||||||
|
@ -106,13 +106,15 @@ C: <box> box
|
||||||
transitions>quot ;
|
transitions>quot ;
|
||||||
|
|
||||||
: states>code ( words dfa -- )
|
: states>code ( words dfa -- )
|
||||||
'[
|
[ ! with-compilation-unit doesn't compile, so we need call( -- )
|
||||||
[
|
[
|
||||||
dup _ word>quot
|
'[
|
||||||
(( last-match index string -- ? ))
|
dup _ word>quot
|
||||||
define-declared
|
(( last-match index string -- ? ))
|
||||||
] each
|
define-declared
|
||||||
] with-compilation-unit ;
|
] each
|
||||||
|
] with-compilation-unit
|
||||||
|
] call( words dfa -- ) ;
|
||||||
|
|
||||||
: states>words ( dfa -- words dfa )
|
: states>words ( dfa -- words dfa )
|
||||||
dup transitions>> keys [ gensym ] H{ } map>assoc
|
dup transitions>> keys [ gensym ] H{ } map>assoc
|
||||||
|
@ -120,34 +122,23 @@ C: <box> box
|
||||||
[ values ]
|
[ values ]
|
||||||
bi swap ;
|
bi swap ;
|
||||||
|
|
||||||
: dfa>word ( dfa -- word )
|
: dfa>main-word ( dfa -- word )
|
||||||
states>words [ states>code ] keep start-state>> ;
|
states>words [ states>code ] keep start-state>> ;
|
||||||
|
|
||||||
: check-string ( string -- string )
|
|
||||||
! Make this configurable
|
|
||||||
dup string? [ "String required" throw ] unless ;
|
|
||||||
|
|
||||||
: setup-regexp ( start-index string -- f start-index string )
|
|
||||||
[ f ] [ >fixnum ] [ check-string ] tri* ; inline
|
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
! The quotation returned is ( start-index string -- i/f )
|
: simple-define-temp ( quot effect -- word )
|
||||||
|
[ [ define-temp ] with-compilation-unit ] call( quot effect -- word ) ;
|
||||||
|
|
||||||
: dfa>quotation ( dfa -- quot )
|
: dfa>word ( dfa -- quot )
|
||||||
dfa>word execution-quot '[ setup-regexp @ ] ;
|
dfa>main-word execution-quot '[ drop [ f ] 2dip @ ]
|
||||||
|
(( start-index string regexp -- i/f )) simple-define-temp ;
|
||||||
|
|
||||||
: dfa>shortest-quotation ( dfa -- quot )
|
: dfa>shortest-word ( dfa -- word )
|
||||||
t shortest? [ dfa>quotation ] with-variable ;
|
t shortest? [ dfa>word ] with-variable ;
|
||||||
|
|
||||||
: dfa>reverse-quotation ( dfa -- quot )
|
: dfa>reverse-word ( dfa -- word )
|
||||||
t backwards? [ dfa>quotation ] with-variable ;
|
t backwards? [ dfa>word ] with-variable ;
|
||||||
|
|
||||||
: dfa>reverse-shortest-quotation ( dfa -- quot )
|
: dfa>reverse-shortest-word ( dfa -- word )
|
||||||
t backwards? [ dfa>shortest-quotation ] with-variable ;
|
t backwards? [ dfa>shortest-word ] with-variable ;
|
||||||
|
|
||||||
TUPLE: quot-matcher quot ;
|
|
||||||
C: <quot-matcher> quot-matcher
|
|
||||||
|
|
||||||
M: quot-matcher match-index-from
|
|
||||||
quot>> call( index string -- i/f ) ;
|
|
||||||
|
|
|
@ -6,9 +6,6 @@ regexp.ast regexp.transition-tables regexp.minimize
|
||||||
regexp.dfa namespaces ;
|
regexp.dfa namespaces ;
|
||||||
IN: regexp.negation
|
IN: regexp.negation
|
||||||
|
|
||||||
: ast>dfa ( parse-tree -- minimal-dfa )
|
|
||||||
construct-nfa disambiguate construct-dfa minimize ;
|
|
||||||
|
|
||||||
CONSTANT: fail-state -1
|
CONSTANT: fail-state -1
|
||||||
|
|
||||||
: add-default-transition ( state's-transitions -- new-state's-transitions )
|
: add-default-transition ( state's-transitions -- new-state's-transitions )
|
||||||
|
@ -49,5 +46,8 @@ CONSTANT: fail-state -1
|
||||||
[ final-states>> keys first ]
|
[ final-states>> keys first ]
|
||||||
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
|
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
|
||||||
|
|
||||||
|
: ast>dfa ( parse-tree -- minimal-dfa )
|
||||||
|
construct-nfa disambiguate construct-dfa minimize ;
|
||||||
|
|
||||||
M: negation nfa-node ( node -- start end )
|
M: negation nfa-node ( node -- start end )
|
||||||
term>> ast>dfa negate-table adjoin-dfa ;
|
term>> ast>dfa negate-table adjoin-dfa ;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel strings help.markup help.syntax regexp.matchers math ;
|
USING: kernel strings help.markup help.syntax math ;
|
||||||
IN: regexp
|
IN: regexp
|
||||||
|
|
||||||
ABOUT: "regexp"
|
ABOUT: "regexp"
|
||||||
|
@ -39,13 +39,14 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
|
||||||
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
|
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
|
||||||
|
|
||||||
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
|
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
|
||||||
{ $subsection all-matches }
|
|
||||||
{ $subsection matches? }
|
{ $subsection matches? }
|
||||||
|
{ $subsection re-contains? }
|
||||||
|
{ $subsection first-match }
|
||||||
|
{ $subsection all-matches }
|
||||||
{ $subsection re-split1 }
|
{ $subsection re-split1 }
|
||||||
{ $subsection re-split }
|
{ $subsection re-split }
|
||||||
{ $subsection re-replace }
|
{ $subsection re-replace }
|
||||||
{ $subsection count-matches }
|
{ $subsection count-matches } ;
|
||||||
{ $subsection re-replace } ;
|
|
||||||
|
|
||||||
HELP: <regexp>
|
HELP: <regexp>
|
||||||
{ $values { "string" string } { "regexp" regexp } }
|
{ $values { "string" string } { "regexp" regexp } }
|
||||||
|
@ -63,25 +64,33 @@ HELP: regexp
|
||||||
{ $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ;
|
{ $class-description "The class of regular expressions. To construct these, see " { $link { "regexp" "construction" } } "." } ;
|
||||||
|
|
||||||
HELP: matches?
|
HELP: matches?
|
||||||
{ $values { "string" string } { "matcher" regexp } { "?" "a boolean" } }
|
{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
|
||||||
{ $description "Tests if the string as a whole matches the given regular expression." } ;
|
{ $description "Tests if the string as a whole matches the given regular expression." } ;
|
||||||
|
|
||||||
HELP: re-split1
|
HELP: re-split1
|
||||||
{ $values { "string" string } { "matcher" regexp } { "before" string } { "after/f" string } }
|
{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
|
||||||
{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
|
{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
|
||||||
|
|
||||||
HELP: all-matches
|
HELP: all-matches
|
||||||
{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
|
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
||||||
{ $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
|
{ $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
|
||||||
|
|
||||||
HELP: count-matches
|
HELP: count-matches
|
||||||
{ $values { "string" string } { "matcher" regexp } { "n" integer } }
|
{ $values { "string" string } { "regexp" regexp } { "n" integer } }
|
||||||
{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
|
{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
|
||||||
|
|
||||||
HELP: re-split
|
HELP: re-split
|
||||||
{ $values { "string" string } { "matcher" regexp } { "seq" "a sequence of slices of the input" } }
|
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
||||||
{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
|
{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
|
||||||
|
|
||||||
HELP: re-replace
|
HELP: re-replace
|
||||||
{ $values { "string" string } { "matcher" regexp } { "replacement" string } { "result" string } }
|
{ $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
|
||||||
{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
|
{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
|
||||||
|
|
||||||
|
HELP: first-match
|
||||||
|
{ $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
|
||||||
|
{ $description "Finds the first match of the regular expression in the string, and returns it as a slice. If there is no match, then " { $link f } " is returned." } ;
|
||||||
|
|
||||||
|
HELP: re-contains?
|
||||||
|
{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
|
||||||
|
{ $description "Determines whether the string has a substring which matches the regular expression given." } ;
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg
|
! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp tools.test kernel sequences regexp.parser regexp.private
|
USING: regexp tools.test kernel sequences regexp.parser regexp.private
|
||||||
eval strings multiline accessors regexp.matchers ;
|
eval strings multiline accessors ;
|
||||||
IN: regexp-tests
|
IN: regexp-tests
|
||||||
|
|
||||||
\ <regexp> must-infer
|
\ <regexp> must-infer
|
||||||
! the following don't compile because [ ] with-compilation-unit doesn't compile
|
\ compile-regexp must-infer
|
||||||
! \ compile-regexp must-infer
|
\ matches? must-infer
|
||||||
! \ matches? must-infer
|
|
||||||
|
|
||||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "" "a*" <regexp> matches? ] unit-test
|
[ t ] [ "" "a*" <regexp> matches? ] unit-test
|
||||||
|
@ -212,8 +211,8 @@ IN: regexp-tests
|
||||||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
[ 3 ] [ "aaacb" "a*" <regexp> match-index-head ] unit-test
|
[ "aaa" ] [ "aaacb" "a*" <regexp> first-match >string ] unit-test
|
||||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-index-head ] unit-test
|
[ "aa" ] [ "aaacb" "aa?" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
[ t ] [ "aaa" R/ AAA/i matches? ] unit-test
|
[ t ] [ "aaa" R/ AAA/i matches? ] unit-test
|
||||||
[ f ] [ "aax" R/ AAA/i matches? ] unit-test
|
[ f ] [ "aax" R/ AAA/i matches? ] unit-test
|
||||||
|
@ -240,11 +239,11 @@ IN: regexp-tests
|
||||||
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "A" R/ \p{Lower}/i matches? ] unit-test
|
[ t ] [ "A" R/ \p{Lower}/i matches? ] unit-test
|
||||||
|
|
||||||
[ t ] [ "abc" reverse R/ abc/r matches? ] unit-test
|
[ t ] [ "abc" R/ abc/r matches? ] unit-test
|
||||||
[ t ] [ "abc" reverse R/ a[bB][cC]/r matches? ] unit-test
|
[ t ] [ "abc" R/ a[bB][cC]/r matches? ] unit-test
|
||||||
|
|
||||||
[ t ] [ 3 "xabc" R/ abc/ <reverse-matcher> match-index-from >boolean ] unit-test
|
[ t ] [ 3 "xabc" R/ abc/r match-index-from >boolean ] unit-test
|
||||||
[ t ] [ 3 "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-from >boolean ] unit-test
|
[ t ] [ 3 "xabc" R/ a[bB][cC]/r match-index-from >boolean ] unit-test
|
||||||
|
|
||||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
|
@ -269,13 +268,13 @@ IN: regexp-tests
|
||||||
|
|
||||||
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
|
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
|
||||||
|
|
||||||
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
|
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||||
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
|
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
|
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
|
||||||
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
|
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
|
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
[ { "1" "2" "3" "4" } ]
|
[ { "1" "2" "3" "4" } ]
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ re-split [ >string ] map ] unit-test
|
||||||
|
@ -301,18 +300,18 @@ IN: regexp-tests
|
||||||
|
|
||||||
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||||
|
|
||||||
[ "" ] [ "ab" "a(?!b)" <regexp> match-head >string ] unit-test
|
[ "" ] [ "ab" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||||
[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
[ t ] [ "fxxbar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
[ t ] [ "foobar" ".{3}(?!foo)bar" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
|
||||||
[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "ba" "(?<=b)(?<=b)a" <regexp> first-match >string ] unit-test
|
||||||
[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> match-head >string ] unit-test
|
[ "a" ] [ "cab" "(?<=c)a(?=b)" <regexp> first-match >string ] unit-test
|
||||||
|
|
||||||
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> first-match length ] unit-test
|
||||||
[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
[ f ] [ "foobxr" "foo(?=bar)" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
! Bug in parsing word
|
! Bug in parsing word
|
||||||
[ t ] [ "a" R' a' matches? ] unit-test
|
[ t ] [ "a" R' a' matches? ] unit-test
|
||||||
|
@ -342,9 +341,19 @@ IN: regexp-tests
|
||||||
|
|
||||||
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
|
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ "ab" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "ab" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "ac" R/ (?~ac|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "ac" R/ (?~ac|[a-z]b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "ac" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ f ] [ "πb" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ t ] [ "πc" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
[ t ] [ "Ab" R/ (?~[a-zA-Z]c|\p{Lower}b)/ matches? ] unit-test
|
||||||
|
|
||||||
! DFA is compiled when needed, or when literal
|
! DFA is compiled when needed, or when literal
|
||||||
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
|
[ regexp-initial-word ] [ "foo" <regexp> dfa>> ] unit-test
|
||||||
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test
|
[ f ] [ R/ foo/ dfa>> \ regexp-initial-word = ] unit-test
|
||||||
|
|
||||||
[ t ] [ "a" R/ ^a/ matches? ] unit-test
|
[ t ] [ "a" R/ ^a/ matches? ] unit-test
|
||||||
[ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
[ f ] [ "\na" R/ ^a/ matches? ] unit-test
|
||||||
|
@ -415,8 +424,12 @@ IN: regexp-tests
|
||||||
[ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
|
[ 1 ] [ "a\r" R/ a$/m count-matches ] unit-test
|
||||||
[ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
|
[ 1 ] [ "a\r\n" R/ a$/m count-matches ] unit-test
|
||||||
|
|
||||||
[ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
|
[ f ] [ "foobxr" "foo\\z" <regexp> first-match ] unit-test
|
||||||
[ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
|
[ 3 ] [ "foo" "foo\\z" <regexp> first-match length ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ "a foo b" R/ foo/ re-contains? ] unit-test
|
||||||
|
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
|
||||||
|
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
|
||||||
|
|
||||||
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||||
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -2,71 +2,166 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors combinators kernel math sequences strings sets
|
USING: accessors combinators kernel math sequences strings sets
|
||||||
assocs prettyprint.backend prettyprint.custom make lexer
|
assocs prettyprint.backend prettyprint.custom make lexer
|
||||||
namespaces parser arrays fry locals regexp.minimize
|
namespaces parser arrays fry locals regexp.parser splitting
|
||||||
regexp.parser regexp.nfa regexp.dfa regexp.classes
|
sorting regexp.ast regexp.negation regexp.compiler words
|
||||||
regexp.transition-tables splitting sorting regexp.ast
|
call call.private math.ranges ;
|
||||||
regexp.negation regexp.matchers regexp.compiler ;
|
|
||||||
IN: regexp
|
IN: regexp
|
||||||
|
|
||||||
TUPLE: regexp
|
TUPLE: regexp
|
||||||
{ raw read-only }
|
{ raw read-only }
|
||||||
{ parse-tree read-only }
|
{ parse-tree read-only }
|
||||||
{ options read-only }
|
{ options read-only }
|
||||||
dfa reverse-dfa ;
|
dfa next-match ;
|
||||||
|
|
||||||
: make-regexp ( string ast -- regexp )
|
TUPLE: reverse-regexp < regexp ;
|
||||||
f f <options> f f regexp boa ; foldable
|
|
||||||
! Foldable because, when the dfa slot is set,
|
|
||||||
! it'll be set to the same thing regardless of who sets it
|
|
||||||
|
|
||||||
: <optioned-regexp> ( string options -- regexp )
|
<PRIVATE
|
||||||
[ dup parse-regexp ] [ string>options ] bi*
|
|
||||||
f f regexp boa ;
|
|
||||||
|
|
||||||
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
: maybe-negated ( lookaround quot -- regexp-quot )
|
||||||
|
'[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ; inline
|
||||||
|
|
||||||
TUPLE: reverse-matcher regexp ;
|
M: lookahead question>quot ! Returns ( index string -- ? )
|
||||||
C: <reverse-matcher> reverse-matcher
|
[ ast>dfa dfa>shortest-word '[ f _ execute ] ] maybe-negated ;
|
||||||
! Reverse matchers won't work properly with most combinators, for now
|
|
||||||
|
: <reversed-option> ( ast -- reversed )
|
||||||
|
"r" string>options <with-options> ;
|
||||||
|
|
||||||
|
M: lookbehind question>quot ! Returns ( index string -- ? )
|
||||||
|
[
|
||||||
|
<reversed-option>
|
||||||
|
ast>dfa dfa>reverse-shortest-word
|
||||||
|
'[ [ 1- ] dip f _ execute ]
|
||||||
|
] maybe-negated ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
|
: check-string ( string -- string )
|
||||||
|
! Make this configurable
|
||||||
|
dup string? [ "String required" throw ] unless ;
|
||||||
|
|
||||||
|
: match-index-from ( i string regexp -- index/f )
|
||||||
|
! This word is unsafe. It assumes that i is a fixnum
|
||||||
|
! and that string is a string.
|
||||||
|
dup dfa>> execute( index string regexp -- i/f ) ;
|
||||||
|
|
||||||
|
GENERIC: end/start ( string regexp -- end start )
|
||||||
|
M: regexp end/start drop length 0 ;
|
||||||
|
M: reverse-regexp end/start drop length 1- -1 swap ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: matches? ( string regexp -- ? )
|
||||||
|
[ end/start ] 2keep
|
||||||
|
[ check-string ] dip
|
||||||
|
match-index-from
|
||||||
|
[ swap = ] [ drop f ] if* ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
|
: match-slice ( i string quot -- slice/f )
|
||||||
|
[ 2dup ] dip call
|
||||||
|
[ swap <slice> ] [ 2drop f ] if* ; inline
|
||||||
|
|
||||||
|
: match-from ( i string quot -- slice/f )
|
||||||
|
[ [ length [a,b) ] keep ] dip
|
||||||
|
'[ _ _ match-slice ] map-find drop ; inline
|
||||||
|
|
||||||
|
: next-match ( i string quot -- i match/f )
|
||||||
|
match-from [ dup [ to>> ] when ] keep ; inline
|
||||||
|
|
||||||
|
: do-next-match ( i string regexp -- i match/f )
|
||||||
|
dup next-match>> execute( i string regexp -- i match/f ) ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: all-matches ( string regexp -- seq )
|
||||||
|
[ check-string ] dip
|
||||||
|
[ 0 [ dup ] ] 2dip '[ _ _ do-next-match ] produce
|
||||||
|
nip but-last ;
|
||||||
|
|
||||||
|
: count-matches ( string regexp -- n )
|
||||||
|
all-matches length ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
|
:: split-slices ( string slices -- new-slices )
|
||||||
|
slices [ to>> ] map 0 prefix
|
||||||
|
slices [ from>> ] map string length suffix
|
||||||
|
[ string <slice> ] 2map ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: first-match ( string regexp -- slice/f )
|
||||||
|
[ 0 ] [ check-string ] [ ] tri*
|
||||||
|
do-next-match nip ;
|
||||||
|
|
||||||
|
: re-contains? ( string regexp -- ? )
|
||||||
|
first-match >boolean ;
|
||||||
|
|
||||||
|
: re-split1 ( string regexp -- before after/f )
|
||||||
|
dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
|
||||||
|
|
||||||
|
: re-split ( string regexp -- seq )
|
||||||
|
dupd all-matches split-slices ;
|
||||||
|
|
||||||
|
: re-replace ( string regexp replacement -- result )
|
||||||
|
[ re-split ] dip join ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
: get-ast ( regexp -- ast )
|
: get-ast ( regexp -- ast )
|
||||||
[ parse-tree>> ] [ options>> ] bi <with-options> ;
|
[ parse-tree>> ] [ options>> ] bi <with-options> ;
|
||||||
|
|
||||||
: compile-regexp ( regexp -- regexp )
|
GENERIC: compile-regexp ( regex -- regexp )
|
||||||
dup '[ [ _ get-ast ast>dfa dfa>quotation ] unless* ] change-dfa ;
|
|
||||||
|
|
||||||
: <reversed-option> ( ast -- reversed )
|
: regexp-initial-word ( i string regexp -- i/f )
|
||||||
"r" string>options <with-options> ;
|
compile-regexp match-index-from ;
|
||||||
|
|
||||||
: maybe-negated ( lookaround quot -- regexp-quot )
|
: do-compile-regexp ( regexp -- regexp )
|
||||||
'[ term>> @ ] [ positive?>> [ ] [ not ] ? ] bi compose ; inline
|
|
||||||
|
|
||||||
M: lookahead question>quot ! Returns ( index string -- ? )
|
|
||||||
[ ast>dfa dfa>shortest-quotation ] maybe-negated ;
|
|
||||||
|
|
||||||
M: lookbehind question>quot ! Returns ( index string -- ? )
|
|
||||||
[
|
|
||||||
<reversed-option>
|
|
||||||
ast>dfa dfa>reverse-shortest-quotation
|
|
||||||
[ [ 1- ] dip ] prepose
|
|
||||||
] maybe-negated ;
|
|
||||||
|
|
||||||
: compile-reverse ( regexp -- regexp )
|
|
||||||
dup '[
|
dup '[
|
||||||
[
|
dup \ regexp-initial-word =
|
||||||
_ get-ast <reversed-option>
|
[ drop _ get-ast ast>dfa dfa>word ] when
|
||||||
ast>dfa dfa>reverse-quotation
|
] change-dfa ;
|
||||||
] unless*
|
|
||||||
] change-reverse-dfa ;
|
|
||||||
|
|
||||||
M: regexp match-index-from
|
M: regexp compile-regexp ( regexp -- regexp )
|
||||||
compile-regexp dfa>> <quot-matcher> match-index-from ;
|
do-compile-regexp ;
|
||||||
|
|
||||||
M: reverse-matcher match-index-from
|
M: reverse-regexp compile-regexp ( regexp -- regexp )
|
||||||
regexp>> compile-reverse reverse-dfa>>
|
t backwards? [ do-compile-regexp ] with-variable ;
|
||||||
<quot-matcher> match-index-from ;
|
|
||||||
|
GENERIC: compile-next-match ( regexp -- regexp )
|
||||||
|
|
||||||
|
: next-initial-word ( i string regexp -- i slice/f )
|
||||||
|
compile-next-match do-next-match ;
|
||||||
|
|
||||||
|
M: regexp compile-next-match ( regexp -- regexp )
|
||||||
|
dup '[
|
||||||
|
dup \ next-initial-word = [
|
||||||
|
drop _ compile-regexp dfa>>
|
||||||
|
'[ _ '[ _ _ execute ] next-match ]
|
||||||
|
(( i string -- i match/f )) simple-define-temp
|
||||||
|
] when
|
||||||
|
] change-next-match ;
|
||||||
|
|
||||||
|
! Write M: reverse-regexp compile-next-match
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: new-regexp ( string ast options class -- regexp )
|
||||||
|
[ \ regexp-initial-word \ next-initial-word ] dip boa ; inline
|
||||||
|
|
||||||
|
: make-regexp ( string ast -- regexp )
|
||||||
|
f f <options> regexp new-regexp ;
|
||||||
|
|
||||||
|
: <optioned-regexp> ( string options -- regexp )
|
||||||
|
[ dup parse-regexp ] [ string>options ] bi*
|
||||||
|
dup on>> reversed-regexp swap member?
|
||||||
|
[ reverse-regexp new-regexp ]
|
||||||
|
[ regexp new-regexp ] if ;
|
||||||
|
|
||||||
|
: <regexp> ( string -- regexp ) "" <optioned-regexp> ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
! The following two should do some caching
|
! The following two should do some caching
|
||||||
|
|
||||||
|
@ -97,7 +192,7 @@ M: reverse-matcher match-index-from
|
||||||
|
|
||||||
: parsing-regexp ( accum end -- accum )
|
: parsing-regexp ( accum end -- accum )
|
||||||
lexer get [ take-until ] [ parse-noblank-token ] bi
|
lexer get [ take-until ] [ parse-noblank-token ] bi
|
||||||
<optioned-regexp> compile-regexp parsed ;
|
<optioned-regexp> compile-next-match parsed ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
@ -120,3 +215,4 @@ M: regexp pprint*
|
||||||
[ options>> options>string % ] bi
|
[ options>> options>string % ] bi
|
||||||
] "" make
|
] "" make
|
||||||
] keep present-text ;
|
] keep present-text ;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2006, 2008 Slava Pestov
|
! Copyright (C) 2006, 2008 Slava Pestov
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel continuations sequences math namespaces make sets
|
USING: kernel continuations sequences math namespaces make sets
|
||||||
math.parser math.ranges assocs regexp regexp.matchers unicode.categories arrays
|
math.parser math.ranges assocs regexp unicode.categories arrays
|
||||||
hashtables words classes quotations xmode.catalog unicode.case ;
|
hashtables words classes quotations xmode.catalog unicode.case ;
|
||||||
IN: validators
|
IN: validators
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
! Copyright (C) 2008 Slava Pestov.
|
! Copyright (C) 2008 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
IN: xmode.marker
|
|
||||||
USING: kernel namespaces make xmode.rules xmode.tokens
|
USING: kernel namespaces make xmode.rules xmode.tokens
|
||||||
xmode.marker.state xmode.marker.context xmode.utilities
|
xmode.marker.state xmode.marker.context xmode.utilities
|
||||||
xmode.catalog sequences math assocs combinators strings
|
xmode.catalog sequences math assocs combinators strings
|
||||||
regexp splitting ascii unicode.case regexp.matchers
|
regexp splitting unicode.case
|
||||||
ascii combinators.short-circuit accessors ;
|
combinators.short-circuit accessors ;
|
||||||
|
IN: xmode.marker
|
||||||
|
|
||||||
! Next two words copied from parser-combinators
|
! Next two words copied from parser-combinators
|
||||||
! Just like head?, but they optionally ignore case
|
! Just like head?, but they optionally ignore case
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008 Slava Pestov.
|
! Copyright (C) 2008 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors regexp.matchers prettyprint io io.encodings.ascii
|
USING: accessors prettyprint io io.encodings.ascii
|
||||||
io.files kernel sequences assocs namespaces regexp ;
|
io.files kernel sequences assocs namespaces regexp ;
|
||||||
IN: benchmark.regex-dna
|
IN: benchmark.regex-dna
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue