case-insensitive for constants
parent
88134e539f
commit
9eba6c0034
|
@ -7,10 +7,10 @@ TUPLE: regexp
|
||||||
raw
|
raw
|
||||||
{ stack vector }
|
{ stack vector }
|
||||||
parse-tree
|
parse-tree
|
||||||
|
{ options hashtable }
|
||||||
nfa-table
|
nfa-table
|
||||||
dfa-table
|
dfa-table
|
||||||
minimized-table
|
minimized-table
|
||||||
case-insensitive
|
|
||||||
{ state integer }
|
{ state integer }
|
||||||
{ new-states vector }
|
{ new-states vector }
|
||||||
{ visited-states hashtable } ;
|
{ visited-states hashtable } ;
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators io io.streams.string
|
USING: accessors arrays assocs combinators io io.streams.string
|
||||||
kernel math math.parser multi-methods namespaces qualified
|
kernel math math.parser multi-methods namespaces qualified sets
|
||||||
quotations sequences sequences.lib splitting symbols vectors
|
quotations sequences sequences.lib splitting symbols vectors
|
||||||
dlists math.order combinators.lib unicode.categories
|
dlists math.order combinators.lib unicode.categories strings
|
||||||
sequences.lib regexp2.backend regexp2.utils ;
|
sequences.lib regexp2.backend regexp2.utils unicode.case ;
|
||||||
IN: regexp2.parser
|
IN: regexp2.parser
|
||||||
|
|
||||||
FROM: math.ranges => [a,b] ;
|
FROM: math.ranges => [a,b] ;
|
||||||
|
@ -48,12 +48,26 @@ SINGLETONS: beginning-of-group end-of-group
|
||||||
beginning-of-character-class end-of-character-class
|
beginning-of-character-class end-of-character-class
|
||||||
left-parenthesis pipe caret dash ;
|
left-parenthesis pipe caret dash ;
|
||||||
|
|
||||||
: <constant> ( obj -- constant ) constant boa ;
|
: get-option ( option -- ? ) current-regexp get options>> at ;
|
||||||
|
: get-unix-lines ( -- ? ) unix-lines get-option ;
|
||||||
|
: get-dotall ( -- ? ) dotall get-option ;
|
||||||
|
: get-multiline ( -- ? ) multiline get-option ;
|
||||||
|
: get-comments ( -- ? ) comments get-option ;
|
||||||
|
: get-case-insensitive ( -- ? ) case-insensitive get-option ;
|
||||||
|
: get-unicode-case ( -- ? ) unicode-case get-option ;
|
||||||
|
|
||||||
: <negation> ( obj -- negation ) negation boa ;
|
: <negation> ( obj -- negation ) negation boa ;
|
||||||
: <concatenation> ( seq -- concatenation ) >vector concatenation boa ;
|
: <concatenation> ( seq -- concatenation ) >vector concatenation boa ;
|
||||||
: <alternation> ( seq -- alternation ) >vector alternation boa ;
|
: <alternation> ( seq -- alternation ) >vector alternation boa ;
|
||||||
: <capture-group> ( obj -- capture-group ) capture-group boa ;
|
: <capture-group> ( obj -- capture-group ) capture-group boa ;
|
||||||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||||
|
: <constant> ( obj -- constant )
|
||||||
|
dup Letter? get-case-insensitive and [
|
||||||
|
[ ch>lower constant boa ]
|
||||||
|
[ ch>upper constant boa ] bi 2array <alternation>
|
||||||
|
] [
|
||||||
|
constant boa
|
||||||
|
] if ;
|
||||||
|
|
||||||
: first|concatenation ( seq -- first/concatenation )
|
: first|concatenation ( seq -- first/concatenation )
|
||||||
dup length 1 = [ first ] [ <concatenation> ] if ;
|
dup length 1 = [ first ] [ <concatenation> ] if ;
|
||||||
|
@ -96,18 +110,19 @@ ERROR: bad-option ch ;
|
||||||
[ bad-option ]
|
[ bad-option ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: option-on ( ch -- ) option \ option-on boa push-stack ;
|
: option-on ( option -- ) current-regexp get options>> conjoin ;
|
||||||
: option-off ( ch -- ) option \ option-off boa push-stack ;
|
: option-off ( option -- ) current-regexp get options>> delete-at ;
|
||||||
: toggle-option ( ch ? -- ) [ option-on ] [ option-off ] if ;
|
|
||||||
|
: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
|
||||||
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
|
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
|
||||||
|
|
||||||
: parse-options ( string -- )
|
: parse-options ( string -- )
|
||||||
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
|
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
|
||||||
|
|
||||||
DEFER: (parse-regexp)
|
DEFER: (parse-regexp)
|
||||||
: parse-special-group-options ( options -- )
|
: parse-special-group ( -- )
|
||||||
beginning-of-group push-stack
|
beginning-of-group push-stack
|
||||||
parse-options (parse-regexp) pop-stack make-non-capturing-group ;
|
(parse-regexp) pop-stack make-non-capturing-group ;
|
||||||
|
|
||||||
ERROR: bad-special-group string ;
|
ERROR: bad-special-group string ;
|
||||||
|
|
||||||
|
@ -126,8 +141,13 @@ ERROR: bad-special-group string ;
|
||||||
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
||||||
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
|
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
|
||||||
[
|
[
|
||||||
":" read-until [ bad-special-group ] unless
|
":)" read-until
|
||||||
swap prefix parse-special-group-options
|
[ swap prefix ] dip
|
||||||
|
{
|
||||||
|
{ CHAR: : [ parse-options parse-special-group ] }
|
||||||
|
{ CHAR: ) [ parse-options ] }
|
||||||
|
[ drop bad-special-group ]
|
||||||
|
} case
|
||||||
]
|
]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
@ -312,10 +332,8 @@ DEFER: handle-left-bracket
|
||||||
beginning-of-character-class push-stack
|
beginning-of-character-class push-stack
|
||||||
parse-character-class-first (parse-character-class) ;
|
parse-character-class-first (parse-character-class) ;
|
||||||
|
|
||||||
ERROR: empty-regexp ;
|
|
||||||
: finish-regexp-parse ( stack -- obj )
|
: finish-regexp-parse ( stack -- obj )
|
||||||
dup length {
|
dup length {
|
||||||
{ 0 [ empty-regexp ] }
|
|
||||||
{ 1 [ first ] }
|
{ 1 [ first ] }
|
||||||
[
|
[
|
||||||
drop { pipe } split
|
drop { pipe } split
|
||||||
|
|
|
@ -151,7 +151,7 @@ IN: regexp2-tests
|
||||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors combinators kernel math math.ranges
|
USING: accessors combinators kernel math math.ranges
|
||||||
sequences regexp2.backend regexp2.utils memoize
|
sequences regexp2.backend regexp2.utils memoize sets
|
||||||
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
|
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
|
||||||
regexp2.transition-tables ;
|
regexp2.transition-tables ;
|
||||||
IN: regexp2
|
IN: regexp2
|
||||||
|
@ -30,12 +30,15 @@ IN: regexp2
|
||||||
|
|
||||||
: match-head ( string regexp -- end ) match length>> 1- ;
|
: match-head ( string regexp -- end ) match length>> 1- ;
|
||||||
|
|
||||||
MEMO: <regexp> ( string -- regexp )
|
: initial-option ( regexp option -- regexp' )
|
||||||
|
over options>> conjoin ;
|
||||||
|
|
||||||
|
: <regexp> ( string -- regexp )
|
||||||
default-regexp construct-regexp ;
|
default-regexp construct-regexp ;
|
||||||
|
|
||||||
MEMO: <iregexp> ( string -- regexp )
|
: <iregexp> ( string -- regexp )
|
||||||
default-regexp
|
default-regexp
|
||||||
t >>case-insensitive
|
case-insensitive initial-option
|
||||||
construct-regexp ;
|
construct-regexp ;
|
||||||
|
|
||||||
: R! CHAR: ! <regexp> ; parsing
|
: R! CHAR: ! <regexp> ; parsing
|
||||||
|
|
Loading…
Reference in New Issue