diff --git a/core/math/parser/parser.factor b/core/math/parser/parser.factor index 78705266ee..05e267f035 100755 --- a/core/math/parser/parser.factor +++ b/core/math/parser/parser.factor @@ -147,6 +147,7 @@ M: float >base { [ dup fp-nan? ] [ drop "0.0/0.0" ] } { [ dup 1.0/0.0 = ] [ drop "1.0/0.0" ] } { [ dup -1.0/0.0 = ] [ drop "-1.0/0.0" ] } + { [ dup double>bits HEX: 8000000000000000 = ] [ drop "-0.0" ] } [ float>string fix-float ] } cond ; diff --git a/extra/regexp2/authors.txt b/unfinished/regexp2/authors.txt similarity index 100% rename from extra/regexp2/authors.txt rename to unfinished/regexp2/authors.txt diff --git a/extra/regexp2/backend/backend.factor b/unfinished/regexp2/backend/backend.factor similarity index 91% rename from extra/regexp2/backend/backend.factor rename to unfinished/regexp2/backend/backend.factor index 5f59c25bc3..c39d67e7b8 100644 --- a/extra/regexp2/backend/backend.factor +++ b/unfinished/regexp2/backend/backend.factor @@ -7,6 +7,7 @@ TUPLE: regexp raw { stack vector } parse-tree + { options hashtable } nfa-table dfa-table minimized-table @@ -18,6 +19,7 @@ TUPLE: regexp 0 >>state V{ } clone >>stack V{ } clone >>new-states + H{ } clone >>options H{ } clone >>visited-states ; SYMBOL: current-regexp diff --git a/extra/regexp2/classes/classes.factor b/unfinished/regexp2/classes/classes.factor similarity index 91% rename from extra/regexp2/classes/classes.factor rename to unfinished/regexp2/classes/classes.factor index 0862f9cb63..7737e02d40 100644 --- a/extra/regexp2/classes/classes.factor +++ b/unfinished/regexp2/classes/classes.factor @@ -21,6 +21,9 @@ M: letter-class class-member? ( obj class -- ? ) M: LETTER-class class-member? ( obj class -- ? ) drop LETTER? ; +M: Letter-class class-member? ( obj class -- ? ) + drop Letter? ; + M: ascii-class class-member? ( obj class -- ? ) drop ascii? ; @@ -47,3 +50,6 @@ M: hex-digit-class class-member? ( obj class -- ? ) M: java-blank-class class-member? ( obj class -- ? ) drop java-blank? ; + +M: unmatchable-class class-member? ( obj class -- ? ) + 2drop f ; diff --git a/extra/regexp2/dfa/dfa.factor b/unfinished/regexp2/dfa/dfa.factor similarity index 100% rename from extra/regexp2/dfa/dfa.factor rename to unfinished/regexp2/dfa/dfa.factor diff --git a/extra/regexp2/nfa/nfa.factor b/unfinished/regexp2/nfa/nfa.factor similarity index 100% rename from extra/regexp2/nfa/nfa.factor rename to unfinished/regexp2/nfa/nfa.factor diff --git a/extra/regexp2/parser/parser-tests.factor b/unfinished/regexp2/parser/parser-tests.factor similarity index 100% rename from extra/regexp2/parser/parser-tests.factor rename to unfinished/regexp2/parser/parser-tests.factor diff --git a/extra/regexp2/parser/parser.factor b/unfinished/regexp2/parser/parser.factor similarity index 80% rename from extra/regexp2/parser/parser.factor rename to unfinished/regexp2/parser/parser.factor index fc1029db58..6eda3310d0 100644 --- a/extra/regexp2/parser/parser.factor +++ b/unfinished/regexp2/parser/parser.factor @@ -1,10 +1,10 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs combinators io io.streams.string -kernel math math.parser multi-methods namespaces qualified +kernel math math.parser multi-methods namespaces qualified sets quotations sequences sequences.lib splitting symbols vectors -dlists math.order combinators.lib unicode.categories -sequences.lib regexp2.backend regexp2.utils ; +dlists math.order combinators.lib unicode.categories strings +sequences.lib regexp2.backend regexp2.utils unicode.case ; IN: regexp2.parser FROM: math.ranges => [a,b] ; @@ -30,30 +30,41 @@ SINGLETON: back-anchor INSTANCE: back-anchor node TUPLE: option-on option ; INSTANCE: option-on node TUPLE: option-off option ; INSTANCE: option-off node -SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ; -MIXIN: regexp-option -INSTANCE: unix-lines regexp-option -INSTANCE: dotall regexp-option -INSTANCE: multiline regexp-option -INSTANCE: comments regexp-option -INSTANCE: case-insensitive regexp-option -INSTANCE: unicode-case regexp-option +SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ; SINGLETONS: letter-class LETTER-class Letter-class digit-class alpha-class non-newline-blank-class ascii-class punctuation-class java-printable-class blank-class -control-character-class hex-digit-class java-blank-class c-identifier-class ; +control-character-class hex-digit-class java-blank-class c-identifier-class +unmatchable-class ; SINGLETONS: beginning-of-group end-of-group beginning-of-character-class end-of-character-class left-parenthesis pipe caret dash ; -: ( obj -- constant ) constant boa ; +: get-option ( option -- ? ) current-regexp get options>> at ; +: get-unix-lines ( -- ? ) unix-lines get-option ; +: get-dotall ( -- ? ) dotall get-option ; +: get-multiline ( -- ? ) multiline get-option ; +: get-comments ( -- ? ) comments get-option ; +: get-case-insensitive ( -- ? ) case-insensitive get-option ; +: get-unicode-case ( -- ? ) unicode-case get-option ; +: get-reversed-regexp ( -- ? ) reversed-regexp get-option ; + : ( obj -- negation ) negation boa ; -: ( seq -- concatenation ) >vector concatenation boa ; +: ( seq -- concatenation ) + >vector get-reversed-regexp [ reverse ] when + concatenation boa ; : ( seq -- alternation ) >vector alternation boa ; : ( obj -- capture-group ) capture-group boa ; : ( obj -- kleene-star ) kleene-star boa ; +: ( obj -- constant ) + dup Letter? get-case-insensitive and [ + [ ch>lower constant boa ] + [ ch>upper constant boa ] bi 2array + ] [ + constant boa + ] if ; : first|concatenation ( seq -- first/concatenation ) dup length 1 = [ first ] [ ] if ; @@ -61,6 +72,17 @@ left-parenthesis pipe caret dash ; : first|alternation ( seq -- first/alternation ) dup length 1 = [ first ] [ ] if ; +: ( from to -- obj ) + 2dup [ Letter? ] bi@ or get-case-insensitive and [ + [ [ ch>lower ] bi@ character-class-range boa ] + [ [ ch>upper ] bi@ character-class-range boa ] 2bi + 2array [ [ from>> ] [ to>> ] bi < ] filter + [ unmatchable-class ] [ first|alternation ] if-empty + ] [ + 2dup < + [ character-class-range boa ] [ 2drop unmatchable-class ] if + ] if ; + ERROR: unmatched-parentheses ; : make-positive-lookahead ( string -- ) @@ -90,24 +112,26 @@ ERROR: bad-option ch ; { CHAR: i [ case-insensitive ] } { CHAR: d [ unix-lines ] } { CHAR: m [ multiline ] } + { CHAR: r [ reversed-regexp ] } { CHAR: s [ dotall ] } { CHAR: u [ unicode-case ] } { CHAR: x [ comments ] } [ bad-option ] } case ; - -: option-on ( ch -- ) option \ option-on boa push-stack ; -: option-off ( ch -- ) option \ option-off boa push-stack ; -: toggle-option ( ch ? -- ) [ option-on ] [ option-off ] if ; + +: option-on ( option -- ) current-regexp get options>> conjoin ; +: option-off ( option -- ) current-regexp get options>> delete-at ; + +: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ; : (parse-options) ( string ? -- ) [ toggle-option ] curry each ; : parse-options ( string -- ) "-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ; DEFER: (parse-regexp) -: parse-special-group-options ( options -- ) +: parse-special-group ( -- ) beginning-of-group push-stack - parse-options (parse-regexp) pop-stack make-non-capturing-group ; + (parse-regexp) pop-stack make-non-capturing-group ; ERROR: bad-special-group string ; @@ -126,8 +150,13 @@ ERROR: bad-special-group string ; { [ dup CHAR: < = peek1 CHAR: ! = and ] [ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] } [ - ":" read-until [ bad-special-group ] unless - swap prefix parse-special-group-options + ":)" read-until + [ swap prefix ] dip + { + { CHAR: : [ parse-options parse-special-group ] } + { CHAR: ) [ parse-options ] } + [ drop bad-special-group ] + } case ] } cond ; @@ -193,10 +222,10 @@ ERROR: expected-posix-class ; read1 CHAR: { = [ expected-posix-class ] unless "}" read-until [ bad-character-class ] unless { - { "Lower" [ letter-class ] } - { "Upper" [ LETTER-class ] } - { "ASCII" [ ascii-class ] } + { "Lower" [ get-case-insensitive Letter-class letter-class ? ] } + { "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] } { "Alpha" [ Letter-class ] } + { "ASCII" [ ascii-class ] } { "Digit" [ digit-class ] } { "Alnum" [ alpha-class ] } { "Punct" [ punctuation-class ] } @@ -250,6 +279,13 @@ ERROR: bad-escaped-literals seq ; { CHAR: 0 [ parse-octal ] } { CHAR: c [ parse-control-character ] } + ! { CHAR: b [ handle-word-boundary ] } + ! { CHAR: B [ handle-word-boundary ] } + ! { CHAR: A [ handle-beginning-of-input ] } + ! { CHAR: G [ end of previous match ] } + ! { CHAR: Z [ handle-end-of-input ] } + ! { CHAR: z [ handle-end-of-input ] } ! except for terminator + { CHAR: Q [ parse-escaped-literals ] } } case ; @@ -273,7 +309,7 @@ ERROR: bad-escaped-literals seq ; handle-dash handle-caret ; : apply-dash ( -- ) - stack [ pop3 nip character-class-range boa ] keep push ; + stack [ pop3 nip ] keep push ; : apply-dash? ( -- ? ) stack dup length 3 >= @@ -312,16 +348,9 @@ DEFER: handle-left-bracket beginning-of-character-class push-stack parse-character-class-first (parse-character-class) ; -ERROR: empty-regexp ; : finish-regexp-parse ( stack -- obj ) - dup length { - { 0 [ empty-regexp ] } - { 1 [ first ] } - [ - drop { pipe } split - [ first|concatenation ] map first|alternation - ] - } case ; + { pipe } split + [ first|concatenation ] map first|alternation ; : handle-right-parenthesis ( -- ) stack beginning-of-group over last-index cut rest diff --git a/unfinished/regexp2/regexp2-docs.factor b/unfinished/regexp2/regexp2-docs.factor new file mode 100644 index 0000000000..f903c14bc4 --- /dev/null +++ b/unfinished/regexp2/regexp2-docs.factor @@ -0,0 +1,14 @@ +! Copyright (C) 2008 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel strings help.markup help.syntax regexp2.backend ; +IN: regexp2 + +HELP: +{ $values { "string" string } { "regexp" regexp } } +{ $description "Compiles a regular expression into a DFA and returns this object. Regular expressions only have to be compiled once and can then be used multiple times to match input strings." } ; + +HELP: +{ $values { "string" string } { "regexp" regexp } } +{ $description "Compiles a case-insensitive regular expression into a DFA and returns this object. Otherwise, exactly the same as " { $link } } ; + +{ } related-words diff --git a/extra/regexp2/regexp2-tests.factor b/unfinished/regexp2/regexp2-tests.factor similarity index 89% rename from extra/regexp2/regexp2-tests.factor rename to unfinished/regexp2/regexp2-tests.factor index 2b34fe6e77..54626ea165 100644 --- a/extra/regexp2/regexp2-tests.factor +++ b/unfinished/regexp2/regexp2-tests.factor @@ -1,4 +1,5 @@ -USING: regexp2 tools.test kernel regexp2.traversal ; +USING: regexp2 tools.test kernel sequences regexp2.parser +regexp2.traversal ; IN: regexp2-tests [ f ] [ "b" "a*" matches? ] unit-test @@ -151,7 +152,7 @@ IN: regexp2-tests [ f ] [ "abc" "[\\p{Upper}]{3}" matches? ] unit-test [ t ] [ "ABC" "[\\p{Upper}]{3}" matches? ] unit-test -[ t ] [ "" "\\Q\\E" matches? ] unit-test +[ f ] [ "" "\\Q\\E" matches? ] unit-test [ f ] [ "a" "\\Q\\E" matches? ] unit-test [ t ] [ "|*+" "\\Q|*+\\E" matches? ] unit-test [ f ] [ "abc" "\\Q|*+\\E" matches? ] unit-test @@ -203,6 +204,8 @@ IN: regexp2-tests drop ] unit-test +[ "{Lower}" ] [ invalid-range? ] must-fail-with + [ t ] [ "fxxbar" "(?!foo).{3}bar" matches? ] unit-test [ f ] [ "foobar" "(?!foo).{3}bar" matches? ] unit-test @@ -226,9 +229,29 @@ IN: regexp2-tests ! [ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test ! [ f ] [ "foo" "foo\\Bbar" matches? ] unit-test -! [ t ] [ "s@f" "[a-z.-]@[a-z]" matches? ] unit-test -! [ f ] [ "a" "[a-z.-]@[a-z]" matches? ] unit-test -! [ t ] [ ".o" "\\.[a-z]" matches? ] unit-test +[ t ] [ "s@f" "[a-z.-]@[a-z]" matches? ] unit-test +[ f ] [ "a" "[a-z.-]@[a-z]" matches? ] unit-test +[ t ] [ ".o" "\\.[a-z]" matches? ] unit-test + +[ t ] [ "a" "(?i)a" matches? ] unit-test +[ t ] [ "a" "(?i)a" matches? ] unit-test +[ t ] [ "A" "(?i)a" matches? ] unit-test +[ t ] [ "A" "(?i)a" matches? ] unit-test + +[ t ] [ "a" "(?-i)a" matches? ] unit-test +[ t ] [ "a" "(?-i)a" matches? ] unit-test +[ f ] [ "A" "(?-i)a" matches? ] unit-test +[ f ] [ "A" "(?-i)a" matches? ] unit-test + +[ f ] [ "A" "[a-z]" matches? ] unit-test +[ t ] [ "A" "[a-z]" matches? ] unit-test + +[ f ] [ "A" "\\p{Lower}" matches? ] unit-test +[ t ] [ "A" "\\p{Lower}" matches? ] unit-test + +[ t ] [ "abc" "abc" matches? ] unit-test +[ t ] [ "abc" "a[bB][cC]" matches? ] unit-test +[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" matches? ] unit-test ! Bug in parsing word ! [ t ] [ "a" R' a' matches? ] unit-test diff --git a/extra/regexp2/regexp2.factor b/unfinished/regexp2/regexp2.factor similarity index 52% rename from extra/regexp2/regexp2.factor rename to unfinished/regexp2/regexp2.factor index 0f15b3c1ec..0b8994ca2b 100644 --- a/extra/regexp2/regexp2.factor +++ b/unfinished/regexp2/regexp2.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors combinators kernel regexp2.backend regexp2.utils -regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal state-tables +USING: accessors combinators kernel math math.ranges +sequences regexp2.backend regexp2.utils memoize sets +regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal regexp2.transition-tables ; IN: regexp2 @@ -13,8 +14,7 @@ IN: regexp2 >>minimized-table reset-regexp ; -: ( string -- regexp ) - default-regexp +: construct-regexp ( regexp -- regexp' ) { [ parse-regexp ] [ construct-nfa ] @@ -22,6 +22,30 @@ IN: regexp2 [ ] } cleave ; +: match ( string regexp -- pair ) + do-match return-match ; + +: matches? ( string regexp -- ? ) + dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ; + +: match-head ( string regexp -- end ) match length>> 1- ; + +: initial-option ( regexp option -- regexp' ) + over options>> conjoin ; + +: ( string -- regexp ) + default-regexp construct-regexp ; + +: ( string -- regexp ) + default-regexp + case-insensitive initial-option + construct-regexp ; + +: ( string -- regexp ) + default-regexp + reversed-regexp initial-option + construct-regexp ; + : R! CHAR: ! ; parsing : R" CHAR: " ; parsing : R# CHAR: # ; parsing diff --git a/extra/regexp2/summary.txt b/unfinished/regexp2/summary.txt similarity index 100% rename from extra/regexp2/summary.txt rename to unfinished/regexp2/summary.txt diff --git a/extra/regexp2/tags.txt b/unfinished/regexp2/tags.txt similarity index 100% rename from extra/regexp2/tags.txt rename to unfinished/regexp2/tags.txt diff --git a/extra/regexp2/transition-tables/transition-tables.factor b/unfinished/regexp2/transition-tables/transition-tables.factor similarity index 100% rename from extra/regexp2/transition-tables/transition-tables.factor rename to unfinished/regexp2/transition-tables/transition-tables.factor diff --git a/extra/regexp2/traversal/traversal.factor b/unfinished/regexp2/traversal/traversal.factor similarity index 90% rename from extra/regexp2/traversal/traversal.factor rename to unfinished/regexp2/traversal/traversal.factor index 2fbdc49a2a..94e96bb935 100644 --- a/extra/regexp2/traversal/traversal.factor +++ b/unfinished/regexp2/traversal/traversal.factor @@ -78,11 +78,3 @@ TUPLE: dfa-traverser dup matches>> [ drop f ] [ [ start-index>> ] [ peek ] bi* 1 ] if-empty ; - -: match ( string regexp -- pair ) - do-match return-match ; - -: matches? ( string regexp -- ? ) - dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ; - -: match-head ( string regexp -- end ) match length>> 1- ; diff --git a/extra/regexp2/utils/utils.factor b/unfinished/regexp2/utils/utils.factor similarity index 100% rename from extra/regexp2/utils/utils.factor rename to unfinished/regexp2/utils/utils.factor diff --git a/unmaintained/random-tester/random-tester.factor b/unmaintained/random-tester/random-tester.factor index 7fb1714860..2b2559e02a 100755 --- a/unmaintained/random-tester/random-tester.factor +++ b/unmaintained/random-tester/random-tester.factor @@ -8,7 +8,7 @@ SYMBOL: errored SYMBOL: before SYMBOL: after SYMBOL: quot -TUPLE: random-tester-error ; +ERROR: random-tester-error ; : setup-test ( #data #code -- data... quot ) #! Variable stack effect @@ -35,7 +35,7 @@ TUPLE: random-tester-error ; "--" print [ . ] each quot get . - random-tester-error construct-empty throw + random-tester-error ] if ] unless clear ; diff --git a/unmaintained/random-tester/safe-words/safe-words.factor b/unmaintained/random-tester/safe-words/safe-words.factor index 5ca2c79afe..7d8adcbc2a 100755 --- a/unmaintained/random-tester/safe-words/safe-words.factor +++ b/unmaintained/random-tester/safe-words/safe-words.factor @@ -1,5 +1,7 @@ -USING: kernel namespaces sequences sorting vocabs ; -USING: arrays assocs generic hashtables math math.intervals math.parser math.functions refs shuffle vectors words ; +USING: kernel namespaces sequences sets sorting vocabs ; +USING: arrays assocs generic hashtables +math math.intervals math.parser math.order math.functions +refs shuffle vectors words ; IN: random-tester.safe-words : ?-words @@ -16,7 +18,11 @@ IN: random-tester.safe-words array? integer? complex? value-ref? ref? key-ref? interval? number? wrapper? tuple? - [-1,1]? between? bignum? both? either? eq? equal? even? fixnum? float? fp-nan? hashtable? interval-contains? interval-subset? interval? key-ref? key? number? odd? pair? power-of-2? ratio? rational? real? subassoc? zero? assoc? curry? vector? callstack? ! clear 3.14 [ assoc? ] compile-1 + [-1,1]? between? bignum? both? either? eq? equal? even? fixnum? + float? fp-nan? hashtable? interval-contains? interval-subset? + interval? key-ref? key? number? odd? pair? power-of-2? + ratio? rational? real? zero? assoc? curry? vector? callstack? + 2^ not ! arrays resize-array @@ -64,6 +70,9 @@ IN: random-tester.safe-words retainstack callstack datastack callstack>array + + curry 2curry 3curry compose 3compose + (assoc-each) } ; : exit-words @@ -83,28 +92,31 @@ IN: random-tester.safe-words ] { } make ; : safe-words ( -- array ) - bad-words { - "alists" "arrays" "assocs" ! "bit-arrays" "byte-arrays" + { + ! "accessors" + "alists" "arrays" "assocs" "bit-arrays" "byte-arrays" ! "classes" "combinators" "compiler" "continuations" ! "core-foundation" "definitions" "documents" ! "float-arrays" "generic" "graphs" "growable" "hashtables" ! io.* - "kernel" "math" + "kernel" "math" "math.bitfields" "math.complex" "math.constants" "math.floats" "math.functions" "math.integers" "math.intervals" "math.libm" - "math.parser" "math.ratios" "math.vectors" - ! "namespaces" "quotations" "sbufs" + "math.parser" "math.order" "math.ratios" "math.vectors" + ! "namespaces" + "quotations" "sbufs" ! "queues" "strings" "sequences" + "sets" "vectors" ! "words" - } [ words ] map concat seq-diff natural-sort ; + } [ words ] map concat bad-words diff natural-sort ; safe-words \ safe-words set-global ! foo dup (clone) = . ! foo dup clone = . ! f [ byte-array>bignum assoc-clone-like ] compile-1 -! 2 3.14 [ construct-empty number= ] compile-1 +! 2 3.14 [ number= ] compile-1 ! 3.14 [ assoc? ] compile-1 ! -3 [ ] 2 [ byte-array>bignum denominator ] compile-1 - +! : foo ( x -- y ) euler bitand ; { foo } compile 20 foo