Merge branch 'master' of git://factorcode.org/git/factor
commit
6feb869f66
|
@ -95,8 +95,9 @@ IN: regexp4-tests
|
|||
|
||||
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
|
||||
|
||||
[ "^" "[^]" <regexp> matches? ] must-fail
|
||||
[ t ] [ "^" "[^]" <regexp> matches? ] must-fail
|
||||
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
|
||||
|
||||
|
|
|
@ -15,8 +15,7 @@ SYMBOL: runtime-epsilon
|
|||
|
||||
TUPLE: regexp raw parentheses-count bracket-count
|
||||
state stack nfa new-states dfa minimized-dfa
|
||||
dot-matches-newlines? character-sets capture-group
|
||||
captured-groups ;
|
||||
dot-matches-newlines? capture-group captured-groups ;
|
||||
|
||||
TUPLE: capture-group n range ;
|
||||
|
||||
|
@ -336,11 +335,26 @@ ERROR: bad-hex number ;
|
|||
[ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
|
||||
} case ;
|
||||
|
||||
: make-character-set ( regexp -- )
|
||||
: handle-dash ( vector -- vector )
|
||||
[ dup dash eq? [ drop CHAR: - ] when ] map ;
|
||||
|
||||
ERROR: unmatched-negated-character-class class ;
|
||||
|
||||
: handle-caret ( vector -- vector ? )
|
||||
dup [ length 2 >= ] [ first caret eq? ] bi and [
|
||||
rest t
|
||||
] [
|
||||
f
|
||||
] if ;
|
||||
|
||||
: make-character-class ( regexp -- )
|
||||
left-bracket over stack>> cut-stack
|
||||
pick (>>stack)
|
||||
[ dup number? [ '[ dup , = ] ] when ] map
|
||||
[ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry
|
||||
handle-dash
|
||||
handle-caret
|
||||
>r [ dup number? [ '[ dup , = ] ] when ] map
|
||||
[ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r>
|
||||
[ [ not ] compose ] when
|
||||
make-nontoken-nfa ;
|
||||
|
||||
: apply-dash ( regexp -- )
|
||||
|
@ -351,18 +365,18 @@ ERROR: bad-hex number ;
|
|||
stack>> dup length 3 >=
|
||||
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
|
||||
|
||||
DEFER: parse-character-set
|
||||
: (parse-character-set) ( regexp -- )
|
||||
DEFER: parse-character-class
|
||||
: (parse-character-class) ( regexp -- )
|
||||
[
|
||||
next get-char
|
||||
{
|
||||
{ CHAR: [ [
|
||||
[ 1+ ] change-bracket-count left-bracket push-stack
|
||||
parse-character-set
|
||||
parse-character-class
|
||||
] }
|
||||
{ CHAR: ] [
|
||||
[ 1- ] change-bracket-count
|
||||
make-character-set
|
||||
make-character-class
|
||||
] }
|
||||
{ CHAR: - [ dash push-stack ] }
|
||||
! { CHAR: & [ ampersand push-stack ] }
|
||||
|
@ -373,20 +387,30 @@ DEFER: parse-character-set
|
|||
} case
|
||||
] [
|
||||
dup bracket-count>> 0 >
|
||||
[ (parse-character-set) ] [ drop ] if
|
||||
[ (parse-character-class) ] [ drop ] if
|
||||
] bi ;
|
||||
|
||||
: parse-character-set-first ( regexp -- )
|
||||
: parse-character-class-second ( regexp -- )
|
||||
get-next
|
||||
{
|
||||
{ CHAR: ^ [ caret push-stack next ] }
|
||||
{ CHAR: [ [ CHAR: [ make-nontoken-nfa next ] }
|
||||
{ CHAR: ] [ CHAR: ] make-nontoken-nfa next ] }
|
||||
! { CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
} case ;
|
||||
|
||||
: parse-character-set ( regexp -- )
|
||||
[ parse-character-set-first ] [ (parse-character-set) ] bi ;
|
||||
: parse-character-class-first ( regexp -- )
|
||||
get-next
|
||||
{
|
||||
{ CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
|
||||
! { CHAR: [ [ CHAR: [ push-stack next ] }
|
||||
{ CHAR: ] [ CHAR: ] push-stack next ] }
|
||||
{ CHAR: - [ CHAR: - push-stack next ] }
|
||||
[ 2drop ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class ( regexp -- )
|
||||
[ parse-character-class-first ] [ (parse-character-class) ] bi ;
|
||||
|
||||
ERROR: unsupported-token token ;
|
||||
: parse-token ( regexp token -- )
|
||||
|
@ -404,7 +428,7 @@ ERROR: unsupported-token token ;
|
|||
{ CHAR: [ [
|
||||
drop
|
||||
dup left-bracket push-stack
|
||||
[ 1+ ] change-bracket-count parse-character-set
|
||||
[ 1+ ] change-bracket-count parse-character-class
|
||||
] }
|
||||
! { CHAR: } [ drop drop "brace" ] }
|
||||
! { CHAR: ? [ drop ] }
|
||||
|
@ -517,7 +541,6 @@ ERROR: unsupported-token token ;
|
|||
0 >>bracket-count
|
||||
-1 >>state
|
||||
V{ } clone >>stack
|
||||
V{ } clone >>character-sets
|
||||
<vector-table> >>nfa
|
||||
dup [ parse-raw-regexp ] [ subset-construction ] bi ;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: kernel tables tools.test ;
|
||||
IN: tables.tests
|
||||
USING: kernel state-tables tools.test ;
|
||||
IN: state-tables.tests
|
||||
|
||||
: test-table
|
||||
<table>
|
||||
|
@ -11,12 +11,17 @@ IN: tables.tests
|
|||
"b" "s" "u" <entry> over set-entry ;
|
||||
|
||||
[
|
||||
T{ table f
|
||||
H{
|
||||
{ "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } }
|
||||
{ "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
|
||||
T{
|
||||
table
|
||||
f
|
||||
H{
|
||||
{ "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } }
|
||||
{ "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
|
||||
}
|
||||
H{ { "l" t } { "s" t } { "c" t } { "o" t } }
|
||||
f
|
||||
H{ }
|
||||
}
|
||||
H{ { "l" t } { "s" t } { "c" t } { "o" t } } }
|
||||
] [ test-table ] unit-test
|
||||
|
||||
[ "x" t ] [ "a" "l" test-table get-entry ] unit-test
|
||||
|
@ -27,13 +32,13 @@ IN: tables.tests
|
|||
|
||||
: vector-test-table
|
||||
<vector-table>
|
||||
"a" "c" "z" <entry> over add-value
|
||||
"a" "c" "r" <entry> over add-value
|
||||
"a" "o" "y" <entry> over add-value
|
||||
"a" "l" "x" <entry> over add-value
|
||||
"b" "o" "y" <entry> over add-value
|
||||
"b" "l" "x" <entry> over add-value
|
||||
"b" "s" "u" <entry> over add-value ;
|
||||
"a" "c" "z" <entry> over add-entry
|
||||
"a" "c" "r" <entry> over add-entry
|
||||
"a" "o" "y" <entry> over add-entry
|
||||
"a" "l" "x" <entry> over add-entry
|
||||
"b" "o" "y" <entry> over add-entry
|
||||
"b" "l" "x" <entry> over add-entry
|
||||
"b" "s" "u" <entry> over add-entry ;
|
||||
|
||||
[
|
||||
T{ vector-table f
|
||||
|
@ -44,6 +49,8 @@ T{ vector-table f
|
|||
H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
|
||||
}
|
||||
H{ { "l" t } { "s" t } { "c" t } { "o" t } }
|
||||
f
|
||||
H{ }
|
||||
}
|
||||
] [ vector-test-table ] unit-test
|
||||
|
||||
|
|
Loading…
Reference in New Issue