diff --git a/basis/compiler/tests/stack-trace.factor b/basis/compiler/tests/stack-trace.factor index cfbea3bcb9..b317ed3eb5 100755 --- a/basis/compiler/tests/stack-trace.factor +++ b/basis/compiler/tests/stack-trace.factor @@ -14,7 +14,7 @@ words splitting grouping sorting accessors ; [ t ] [ symbolic-stack-trace [ word? ] filter - { baz bar foo throw } tail? + { baz bar foo } tail? ] unit-test : bleh ( seq -- seq' ) [ 3 + ] map [ 0 > ] filter ; diff --git a/basis/generalizations/generalizations-docs.factor b/basis/generalizations/generalizations-docs.factor index 376ae5bed2..2088e468c6 100644 --- a/basis/generalizations/generalizations-docs.factor +++ b/basis/generalizations/generalizations-docs.factor @@ -58,7 +58,7 @@ HELP: npick "placed on the top of the stack." } { $examples - { $example "USING: kernel prettyprint generalizations ;" "1 2 3 4 4 npick .s clear" "1\n2\n3\n4\n1" } + { $example "USING: kernel prettyprint generalizations ;" "1 2 3 4 4 npick 5 narray ." "{ 1 2 3 4 1 }" } "Some core words expressed in terms of " { $link npick } ":" { $table { { $link dup } { $snippet "1 npick" } } @@ -75,7 +75,7 @@ HELP: ndup "placed on the top of the stack." } { $examples - { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 4 ndup .s clear" "1\n2\n3\n4\n1\n2\n3\n4" } + { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 4 ndup 8 narray ." "{ 1 2 3 4 1 2 3 4 }" } "Some core words expressed in terms of " { $link ndup } ":" { $table { { $link dup } { $snippet "1 ndup" } } @@ -91,7 +91,7 @@ HELP: nnip "for any number of items." } { $examples - { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 3 nnip .s clear" "4" } + { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 3 nnip ." "4" } "Some core words expressed in terms of " { $link nnip } ":" { $table { { $link nip } { $snippet "1 nnip" } } @@ -106,7 +106,7 @@ HELP: ndrop "for any number of items." } { $examples - { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 3 ndrop .s clear" "1" } + { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 3 ndrop ." "1" } "Some core words expressed in terms of " { $link ndrop } ":" { $table { { $link drop } { $snippet "1 ndrop" } } @@ -121,7 +121,7 @@ HELP: nrot "number of items on the stack. " } { $examples - { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 4 nrot .s clear" "2\n3\n4\n1" } + { $example "USING: arrays generalizations kernel prettyprint ;" "1 2 3 4 4 nrot 4array ." "{ 2 3 4 1 }" } "Some core words expressed in terms of " { $link nrot } ":" { $table { { $link swap } { $snippet "1 nrot" } } @@ -135,7 +135,7 @@ HELP: -nrot "number of items on the stack. " } { $examples - { $example "USING: prettyprint generalizations kernel ;" "1 2 3 4 4 -nrot .s clear" "4\n1\n2\n3" } + { $example "USING: arrays generalizations kernel prettyprint ;" "1 2 3 4 4 -nrot 4array ." "{ 4 1 2 3 }" } "Some core words expressed in terms of " { $link -nrot } ":" { $table { { $link swap } { $snippet "1 -nrot" } } @@ -151,8 +151,8 @@ HELP: ndip "stack. The quotation can consume and produce any number of items." } { $examples - { $example "USING: generalizations kernel prettyprint kernel ;" "1 2 [ dup ] 1 ndip .s clear" "1\n1\n2" } - { $example "USING: generalizations kernel prettyprint kernel ;" "1 2 3 [ drop ] 2 ndip .s clear" "2\n3" } + { $example "USING: arrays generalizations kernel prettyprint ;" "1 2 [ dup ] 1 ndip 3array ." "{ 1 1 2 }" } + { $example "USING: arrays generalizations kernel prettyprint ;" "1 2 3 [ drop ] 2 ndip 2array ." "{ 2 3 }" } "Some core words expressed in terms of " { $link ndip } ":" { $table { { $link dip } { $snippet "1 ndip" } } @@ -168,7 +168,7 @@ HELP: nslip "removed from the stack, the quotation called, and the items restored." } { $examples - { $example "USING: generalizations kernel prettyprint ;" "[ 99 ] 1 2 3 4 5 5 nslip .s clear" "99\n1\n2\n3\n4\n5" } + { $example "USING: generalizations kernel prettyprint ;" "[ 99 ] 1 2 3 4 5 5 nslip 6 narray ." "{ 99 1 2 3 4 5 }" } "Some core words expressed in terms of " { $link nslip } ":" { $table { { $link slip } { $snippet "1 nslip" } } @@ -184,7 +184,7 @@ HELP: nkeep "saved, the quotation called, and the items restored." } { $examples - { $example "USING: generalizations kernel prettyprint ;" "1 2 3 4 5 [ drop drop drop drop drop 99 ] 5 nkeep .s clear" "99\n1\n2\n3\n4\n5" } + { $example "USING: generalizations kernel prettyprint ;" "1 2 3 4 5 [ drop drop drop drop drop 99 ] 5 nkeep 6 narray ." "{ 99 1 2 3 4 5 }" } "Some core words expressed in terms of " { $link nkeep } ":" { $table { { $link keep } { $snippet "1 nkeep" } } diff --git a/basis/html/streams/streams-tests.factor b/basis/html/streams/streams-tests.factor index 249861b12a..835874cbb7 100644 --- a/basis/html/streams/streams-tests.factor +++ b/basis/html/streams/streams-tests.factor @@ -61,6 +61,4 @@ M: funky url-of "http://www.funky-town.com/" swap town>> append ; [ H{ } [ ] with-nesting nl ] make-html-string ] unit-test -[ ] [ [ { 1 2 3 } describe ] with-html-writer drop ] unit-test - -[ ] [ [ \ predicate-instance? def>> . ] with-html-writer drop ] unit-test +[ ] [ [ { 1 2 3 } describe ] with-html-writer drop ] unit-test \ No newline at end of file diff --git a/basis/http/http.factor b/basis/http/http.factor index bf58f5c238..2b68edfb8e 100755 --- a/basis/http/http.factor +++ b/basis/http/http.factor @@ -5,8 +5,7 @@ sequences splitting sorting sets strings vectors hashtables quotations arrays byte-arrays math.parser calendar calendar.format present urls fry io io.encodings io.encodings.iana io.encodings.binary -io.encodings.8-bit io.crlf -unicode.case unicode.categories +io.encodings.8-bit io.crlf ascii http.parsers base64 ; IN: http @@ -215,11 +214,10 @@ TUPLE: post-data data params content-type content-encoding ; : parse-content-type-attributes ( string -- attributes ) " " split harvest [ "=" split1 - [ >lower ] [ "\"" ?head drop "\"" ?tail drop ] bi* + "\"" ?head drop "\"" ?tail drop ] { } map>assoc ; : parse-content-type ( content-type -- type encoding ) ";" split1 - parse-content-type-attributes "charset" swap at - [ name>encoding ] - [ dup "text/" head? latin1 binary ? ] if* ; + parse-content-type-attributes "charset" swap at name>encoding + [ dup "text/" head? latin1 binary ? ] unless* ; diff --git a/basis/io/encodings/iana/iana-docs.factor b/basis/io/encodings/iana/iana-docs.factor index c565d79ef5..628bceac62 100644 --- a/basis/io/encodings/iana/iana-docs.factor +++ b/basis/io/encodings/iana/iana-docs.factor @@ -9,24 +9,15 @@ ARTICLE: "io.encodings.iana" "IANA-registered encoding names" { $subsection name>encoding } { $subsection encoding>name } "To let a new encoding be used with the above words, use the following:" -{ $subsection register-encoding } -"Exceptions when encodings or names are not found:" -{ $subsection missing-encoding } -{ $subsection missing-name } ; - -HELP: missing-encoding -{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ; - -HELP: missing-name -{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ; +{ $subsection register-encoding } ; HELP: name>encoding { $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } } -{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ; +{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $snippet "f" } " if it is not found (either not implemented in Factor or not registered)." } ; HELP: encoding>name { $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } } -{ $description "Given an encoding descriptor, return the preferred IANA name." } ; +{ $description "Given an encoding descriptor, return the preferred IANA name. If no name is found, returns " { $snippet "f" } "." } ; { name>encoding encoding>name } related-words diff --git a/basis/io/encodings/iana/iana-tests.factor b/basis/io/encodings/iana/iana-tests.factor index 3175e624ce..67b849b2b2 100644 --- a/basis/io/encodings/iana/iana-tests.factor +++ b/basis/io/encodings/iana/iana-tests.factor @@ -19,10 +19,10 @@ ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding "csEBCDICFISEA" n>e-table get delete-at ebcdic-fisea e>n-table get delete-at ] unit-test -[ "EBCDIC-FI-SE-A" name>encoding ] must-fail -[ "csEBCDICFISEA" name>encoding ] must-fail -[ ebcdic-fisea encoding>name ] must-fail +[ f ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test +[ f ] [ "csEBCDICFISEA" name>encoding ] unit-test +[ f ] [ ebcdic-fisea encoding>name ] unit-test [ ebcdic-fisea "foobar" register-encoding ] must-fail -[ "foobar" name>encoding ] must-fail -[ ebcdic-fisea encoding>name ] must-fail +[ f ] [ "foobar" name>encoding ] unit-test +[ f ] [ ebcdic-fisea encoding>name ] unit-test diff --git a/basis/io/encodings/iana/iana.factor b/basis/io/encodings/iana/iana.factor index a8555ac339..b504bf854a 100644 --- a/basis/io/encodings/iana/iana.factor +++ b/basis/io/encodings/iana/iana.factor @@ -10,15 +10,11 @@ SYMBOL: e>n-table SYMBOL: aliases PRIVATE> -ERROR: missing-encoding name ; +: name>encoding ( name -- encoding/f ) + n>e-table get-global at ; -: name>encoding ( name -- encoding ) - dup n>e-table get-global at [ ] [ missing-encoding ] ?if ; - -ERROR: missing-name encoding ; - -: encoding>name ( encoding -- name ) - dup e>n-table get-global at [ ] [ missing-name ] ?if ; +: encoding>name ( encoding -- name/f ) + e>n-table get-global at ; ( -- config ) "vocab:openssl/test/server.pem" >>key-file "vocab:openssl/test/dh1024.pem" >>dh-file - "password" >>password + "password" >>password ; + +: with-test-context ( quot -- ) + swap with-secure-context ; inline diff --git a/basis/regexp/ast/ast.factor b/basis/regexp/ast/ast.factor index 1c11ed5c7d..be657227e5 100644 --- a/basis/regexp/ast/ast.factor +++ b/basis/regexp/ast/ast.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel arrays accessors fry sequences regexp.classes ; -FROM: math.ranges => [a,b] ; +USING: kernel arrays accessors fry sequences regexp.classes +math.ranges math ; IN: regexp.ast TUPLE: negation term ; @@ -49,10 +49,20 @@ SINGLETONS: unix-lines dotall multiline case-insensitive reversed-regexp ; ; GENERIC: ( term times -- term' ) + M: at-least n>> swap [ repetition ] [ ] bi 2array ; + +: to-times ( term n -- ast ) + dup zero? + [ 2drop epsilon ] + [ dupd 1- to-times 2array ] + if ; + M: from-to - [ n>> ] [ m>> ] bi [a,b] swap '[ _ repetition ] map ; + [ n>> swap repetition ] + [ [ m>> ] [ n>> ] bi - to-times ] 2bi + 2array ; : char-class ( ranges ? -- term ) [ ] dip [ ] when ; diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index e3a1774585..28b0ed1563 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -2,20 +2,33 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel math math.order words combinators locals ascii unicode.categories combinators.short-circuit sequences -fry macros arrays assocs sets classes mirrors ; +fry macros arrays assocs sets classes mirrors unicode.script +unicode.data ; IN: regexp.classes -SINGLETONS: any-char any-char-no-nl -letter-class LETTER-class Letter-class digit-class +SINGLETONS: dot letter-class LETTER-class Letter-class digit-class alpha-class non-newline-blank-class ascii-class punctuation-class java-printable-class blank-class control-character-class hex-digit-class java-blank-class c-identifier-class unmatchable-class terminator-class word-boundary-class ; -SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file ^unix $unix word-break ; +SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file +^unix $unix word-break ; -TUPLE: range from to ; -C: range +TUPLE: range-class from to ; +C: range-class + +TUPLE: primitive-class class ; +C: primitive-class + +TUPLE: category-class category ; +C: category-class + +TUPLE: category-range-class category ; +C: category-range-class + +TUPLE: script-class script ; +C: script-class GENERIC: class-member? ( obj class -- ? ) @@ -23,15 +36,9 @@ M: t class-member? ( obj class -- ? ) 2drop t ; M: integer class-member? ( obj class -- ? ) = ; -M: range class-member? ( obj class -- ? ) +M: range-class class-member? ( obj class -- ? ) [ from>> ] [ to>> ] bi between? ; -M: any-char class-member? ( obj class -- ? ) - 2drop t ; - -M: any-char-no-nl class-member? ( obj class -- ? ) - drop CHAR: \n = not ; - M: letter-class class-member? ( obj class -- ? ) drop letter? ; @@ -99,21 +106,24 @@ M: unmatchable-class class-member? ( obj class -- ? ) M: terminator-class class-member? ( obj class -- ? ) drop "\r\n\u000085\u002029\u002028" member? ; -M: ^ class-member? ( obj class -- ? ) - 2drop f ; - -M: $ class-member? ( obj class -- ? ) - 2drop f ; - M: f class-member? 2drop f ; -TUPLE: primitive-class class ; -C: primitive-class +M: script-class class-member? + [ script-of ] [ script>> ] bi* = ; + +M: category-class class-member? + [ category# ] [ category>> ] bi* = ; + +M: category-range-class class-member? + [ category first ] [ category>> ] bi* = ; TUPLE: not-class class ; PREDICATE: not-integer < not-class class>> integer? ; -PREDICATE: not-primitive < not-class class>> primitive-class? ; + +UNION: simple-class + primitive-class range-class category-class category-range-class dot ; +PREDICATE: not-simple < not-class class>> simple-class? ; M: not-class class-member? class>> class-member? not ; @@ -140,14 +150,14 @@ DEFER: substitute [ drop class new seq { } like >>seq ] } case ; inline -TUPLE: class-partition integers not-integers primitives not-primitives and or other ; +TUPLE: class-partition integers not-integers simples not-simples and or other ; : partition-classes ( seq -- class-partition ) prune [ integer? ] partition [ not-integer? ] partition - [ primitive-class? ] partition ! extend primitive-class to epsilon tags - [ not-primitive? ] partition + [ simple-class? ] partition + [ not-simple? ] partition [ and-class? ] partition [ or-class? ] partition class-partition boa ; @@ -161,17 +171,17 @@ TUPLE: class-partition integers not-integers primitives not-primitives and or ot : filter-not-integers ( partition -- partition' ) dup - [ primitives>> ] [ not-primitives>> ] [ or>> ] tri + [ simples>> ] [ not-simples>> ] [ or>> ] tri 3append and-class boa '[ [ class>> _ class-member? ] filter ] change-not-integers ; : answer-ors ( partition -- partition' ) - dup [ not-integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + dup [ not-integers>> ] [ not-simples>> ] [ simples>> ] tri 3append '[ [ _ [ t substitute ] each ] map ] change-or ; : contradiction? ( partition -- ? ) { - [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ [ simples>> ] [ not-simples>> ] bi intersects? ] [ other>> f swap member? ] } 1|| ; @@ -192,17 +202,17 @@ TUPLE: class-partition integers not-integers primitives not-primitives and or ot : filter-integers ( partition -- partition' ) dup - [ primitives>> ] [ not-primitives>> ] [ and>> ] tri + [ simples>> ] [ not-simples>> ] [ and>> ] tri 3append or-class boa '[ [ _ class-member? not ] filter ] change-integers ; : answer-ands ( partition -- partition' ) - dup [ integers>> ] [ not-primitives>> ] [ primitives>> ] tri 3append + dup [ integers>> ] [ not-simples>> ] [ simples>> ] tri 3append '[ [ _ [ f substitute ] each ] map ] change-and ; : tautology? ( partition -- ? ) { - [ [ primitives>> ] [ not-primitives>> ] bi intersects? ] + [ [ simples>> ] [ not-simples>> ] bi intersects? ] [ other>> t swap member? ] } 1|| ; @@ -241,8 +251,6 @@ M: f drop t ; M: primitive-class class-member? class>> class-member? ; -UNION: class primitive-class not-class or-class and-class range ; - TUPLE: condition question yes no ; C: condition diff --git a/basis/regexp/combinators/combinators.factor b/basis/regexp/combinators/combinators.factor index 2941afd99e..3bb5fcef6d 100644 --- a/basis/regexp/combinators/combinators.factor +++ b/basis/regexp/combinators/combinators.factor @@ -13,14 +13,14 @@ IN: regexp.combinators PRIVATE> -CONSTANT: R/ (?~.*)/ +CONSTANT: R/ (?~.*)/s : ( string -- regexp ) [ "\\Q" "\\E" surround ] [ ] bi make-regexp ; foldable : ( char1 char2 -- regexp ) [ [ "[" "-" surround ] [ "]" append ] bi* append ] - [ ] + [ ] 2bi make-regexp ; : ( regexps -- disjunction ) diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index d59d4818ec..a692f70778 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -3,11 +3,11 @@ USING: accessors arrays assocs grouping kernel locals math namespaces sequences fry quotations math.order math.ranges vectors unicode.categories regexp.transition-tables words sets hashtables -combinators.short-circuit unicode.case unicode.case.private regexp.ast -regexp.classes ; +combinators.short-circuit unicode.data regexp.ast +regexp.classes memoize ; IN: regexp.nfa -! This uses unicode.case.private for ch>upper and ch>lower +! This uses unicode.data for ch>upper and ch>lower ! but case-insensitive matching should be done by case-folding everything ! before processing starts @@ -117,8 +117,17 @@ M: or-class modify-class M: not-class modify-class class>> modify-class ; -M: any-char modify-class - drop dotall option? t any-char-no-nl ? ; +MEMO: unix-dot ( -- class ) + CHAR: \n ; + +MEMO: nonl-dot ( -- class ) + { CHAR: \n CHAR: \r } ; + +M: dot modify-class + drop dotall option? [ t ] [ + unix-lines option? + unix-dot nonl-dot ? + ] if ; : modify-letter-class ( class -- newclass ) case-insensitive option? [ drop Letter-class ] when ; @@ -131,17 +140,17 @@ M: LETTER-class modify-class modify-letter-class ; [ [ LETTER? ] bi@ and ] } 2|| ; -M: range modify-class +M: range-class modify-class case-insensitive option? [ dup cased-range? [ [ from>> ] [ to>> ] bi - [ [ ch>lower ] bi@ ] - [ [ ch>upper ] bi@ ] 2bi + [ [ ch>lower ] bi@ ] + [ [ ch>upper ] bi@ ] 2bi 2array ] when ] when ; -M: class nfa-node +M: object nfa-node modify-class add-simple-entry ; M: with-options nfa-node ( node -- start end ) diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 7b2d6af2c1..bf5465e0e2 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -18,6 +18,13 @@ ERROR: bad-number ; ERROR: bad-class name ; +: parse-unicode-class ( name -- class ) + ! Implement this! + drop f ; + +: unicode-class ( name -- class ) + dup parse-unicode-class [ ] [ bad-class ] ?if ; + : name>class ( name -- class ) >string >case-fold { { "lower" letter-class } @@ -32,8 +39,7 @@ ERROR: bad-class name ; { "cntrl" control-character-class } { "xdigit" hex-digit-class } { "space" java-blank-class } - ! TODO: unicode-character-class - } [ bad-class ] at-error ; + } [ unicode-class ] at-error ; : lookup-escape ( char -- ast ) { @@ -119,10 +125,10 @@ AnyRangeCharacter = EscapeSequence | . RangeCharacter = !("]") AnyRangeCharacter -Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b ]] +Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b ]] | RangeCharacter -StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b ]] +StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b ]] | AnyRangeCharacter Ranges = StartRange:s Range*:r => [[ r s prefix ]] @@ -144,7 +150,7 @@ Parenthized = "?:" Alternation:a => [[ a ]] Element = "(" Parenthized:p ")" => [[ p ]] | "[" CharClass:r "]" => [[ r ]] - | ".":d => [[ any-char ]] + | ".":d => [[ dot ]] | Character Number = (!(","|"}").)* => [[ string>number ensure-number ]] diff --git a/basis/regexp/prettyprint/authors.txt b/basis/regexp/prettyprint/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/basis/regexp/prettyprint/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/basis/regexp/prettyprint/prettyprint.factor b/basis/regexp/prettyprint/prettyprint.factor new file mode 100644 index 0000000000..7af762a34e --- /dev/null +++ b/basis/regexp/prettyprint/prettyprint.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors kernel make prettyprint.backend +prettyprint.custom regexp regexp.parser regexp.private ; +IN: regexp.prettyprint + +M: regexp pprint* + [ + [ + [ raw>> dup find-regexp-syntax swap % swap % % ] + [ options>> options>string % ] bi + ] "" make + ] keep present-text ; \ No newline at end of file diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 5889b19e47..33499b1437 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -1,10 +1,9 @@ ! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators kernel kernel.private math sequences -sequences.private strings sets assocs prettyprint.backend -prettyprint.custom make lexer namespaces parser arrays fry locals -regexp.parser splitting sorting regexp.ast regexp.negation -regexp.compiler compiler.units words math.ranges ; +sequences.private strings sets assocs make lexer namespaces parser +arrays fry locals regexp.parser splitting sorting regexp.ast +regexp.negation regexp.compiler compiler.units words math.ranges ; IN: regexp TUPLE: regexp @@ -217,11 +216,8 @@ PRIVATE> : R{ CHAR: } parsing-regexp ; parsing : R| CHAR: | parsing-regexp ; parsing -M: regexp pprint* - [ - [ - [ raw>> dup find-regexp-syntax swap % swap % % ] - [ options>> options>string % ] bi - ] "" make - ] keep present-text ; +USING: vocabs vocabs.loader ; +"prettyprint" vocab [ + "regexp.prettyprint" require +] when \ No newline at end of file diff --git a/basis/simple-flat-file/simple-flat-file.factor b/basis/simple-flat-file/simple-flat-file.factor index 403fc4d14b..6e53c97738 100644 --- a/basis/simple-flat-file/simple-flat-file.factor +++ b/basis/simple-flat-file/simple-flat-file.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ; +USING: sequences splitting kernel math.parser io.files io.encodings.utf8 +biassocs ascii ; IN: simple-flat-file : drop-comments ( seq -- newseq ) - [ "#" split1 drop ] map harvest ; + [ "#@" split first ] map harvest ; : split-column ( line -- columns ) " \t" split harvest 2 short head 2 f pad-tail ; @@ -22,5 +23,10 @@ IN: simple-flat-file drop-comments [ parse-line ] map ; : flat-file>biassoc ( filename -- biassoc ) - ascii file-lines process-codetable-lines >biassoc ; + utf8 file-lines process-codetable-lines >biassoc ; +: split-; ( line -- array ) + ";" split [ [ blank? ] trim ] map ; + +: data ( filename -- data ) + utf8 file-lines drop-comments [ split-; ] map ; diff --git a/basis/stack-checker/errors/errors.factor b/basis/stack-checker/errors/errors.factor index 7f35ece714..07c26ad100 100644 --- a/basis/stack-checker/errors/errors.factor +++ b/basis/stack-checker/errors/errors.factor @@ -1,8 +1,8 @@ -! Copyright (C) 2006, 2008 Slava Pestov. +! Copyright (C) 2006, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel generic sequences io words arrays summary effects -assocs accessors namespaces compiler.errors stack-checker.values -stack-checker.recursive-state ; +continuations assocs accessors namespaces compiler.errors +stack-checker.values stack-checker.recursive-state ; IN: stack-checker.errors : pretty-word ( word -- word' ) @@ -15,7 +15,7 @@ M: inference-error compiler-error-type type>> ; : (inference-error) ( ... class type -- * ) [ boa ] dip recursive-state get word>> - \ inference-error boa throw ; inline + \ inference-error boa rethrow ; inline : inference-error ( ... class -- * ) +error+ (inference-error) ; inline diff --git a/basis/tools/deploy/deploy-tests.factor b/basis/tools/deploy/deploy-tests.factor index 3a2f960fc9..3bebf7236d 100644 --- a/basis/tools/deploy/deploy-tests.factor +++ b/basis/tools/deploy/deploy-tests.factor @@ -26,6 +26,8 @@ os macosx? [ [ t ] [ "webkit-demo" shake-and-bake 500000 small-enough? ] unit-test ] when +[ t ] [ "benchmark.regex-dna" shake-and-bake 900000 small-enough? ] unit-test + { "tools.deploy.test.1" "tools.deploy.test.2" diff --git a/basis/tools/deploy/shaker/shaker.factor b/basis/tools/deploy/shaker/shaker.factor index 239d34b864..a729e40e2a 100755 --- a/basis/tools/deploy/shaker/shaker.factor +++ b/basis/tools/deploy/shaker/shaker.factor @@ -54,11 +54,8 @@ IN: tools.deploy.shaker ] when ; : strip-call ( -- ) - "call" vocab [ - "Stripping stack effect checking from call( and execute(" show - "vocab:tools/deploy/shaker/strip-call.factor" - run-file - ] when ; + "Stripping stack effect checking from call( and execute(" show + "vocab:tools/deploy/shaker/strip-call.factor" run-file ; : strip-cocoa ( -- ) "cocoa" vocab [ diff --git a/basis/tools/deploy/shaker/strip-call.factor b/basis/tools/deploy/shaker/strip-call.factor index 860a0f3849..d0593b6c15 100644 --- a/basis/tools/deploy/shaker/strip-call.factor +++ b/basis/tools/deploy/shaker/strip-call.factor @@ -2,8 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. IN: tools.deploy.shaker.call -IN: call -USE: call.private +IN: combinators +USE: combinators.private : call-effect ( word effect -- ) call-effect-unsafe ; inline diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index f2e9454545..91f6a45911 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -2,8 +2,9 @@ ! See http://factorcode.org/license.txt for BSD license. USING: combinators.short-circuit unicode.categories kernel math combinators splitting sequences math.parser io.files io assocs -arrays namespaces make math.ranges unicode.normalize.private values -io.encodings.ascii unicode.syntax unicode.data compiler.units fry +arrays namespaces make math.ranges unicode.normalize +unicode.normalize.private values io.encodings.ascii +unicode.syntax unicode.data compiler.units fry alien.syntax sets accessors interval-maps memoize locals words ; IN: unicode.breaks @@ -126,7 +127,7 @@ to: grapheme-table VALUE: word-break-table -"vocab:unicode/data/WordBreakProperty.txt" load-script +"vocab:unicode/data/WordBreakProperty.txt" load-key-value to: word-break-table C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter diff --git a/basis/unicode/case/case-tests.factor b/basis/unicode/case/case-tests.factor index 52a8d9755e..a76f5e78c4 100644 --- a/basis/unicode/case/case-tests.factor +++ b/basis/unicode/case/case-tests.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2008, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: unicode.case unicode.case.private tools.test namespaces strings unicode.normalize ; +USING: unicode.case tools.test namespaces strings unicode.normalize +unicode.case.private ; IN: unicode.case.tests \ >upper must-infer diff --git a/basis/unicode/case/case.factor b/basis/unicode/case/case.factor index c75582dacd..fa842b8b81 100644 --- a/basis/unicode/case/case.factor +++ b/basis/unicode/case/case.factor @@ -7,12 +7,6 @@ strings splitting kernel accessors unicode.breaks fry locals ; QUALIFIED: ascii IN: unicode.case -lower ( ch -- lower ) simple-lower at-default ; inline -: ch>upper ( ch -- upper ) simple-upper at-default ; inline -: ch>title ( ch -- title ) simple-title at-default ; inline -PRIVATE> - SYMBOL: locale ! Just casing locale, or overall? :> out string [ - dup special-casing at + dup special-case [ string-quot call out push-all ] [ char-quot call out push ] ?if ] each out "" like ; inline diff --git a/basis/unicode/categories/categories-tests.factor b/basis/unicode/categories/categories-tests.factor index e16125b642..1e718cf9b7 100644 --- a/basis/unicode/categories/categories-tests.factor +++ b/basis/unicode/categories/categories-tests.factor @@ -1,4 +1,7 @@ -USING: tools.test kernel unicode.categories words sequences unicode.syntax ; +! Copyright (C) 2008 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test kernel unicode.categories words sequences unicode.data ; +IN: unicode.categories.tests [ { f f t t f t t f f t } ] [ CHAR: A { blank? letter? LETTER? Letter? digit? diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor index 2a94d501bd..0c51ea4352 100755 --- a/basis/unicode/collation/collation.factor +++ b/basis/unicode/collation/collation.factor @@ -5,7 +5,7 @@ io.encodings.ascii kernel values splitting accessors math.parser ascii io assocs strings math namespaces make sorting combinators math.order arrays unicode.normalize unicode.data locals unicode.syntax macros sequences.deep words unicode.breaks -quotations combinators.short-circuit ; +quotations combinators.short-circuit simple-flat-file ; IN: unicode.collation >primary ] [ >>secondary ] [ >>tertiary ] tri* ] map ; -: parse-line ( line -- code-poing weight ) - ";" split1 [ [ blank? ] trim ] bi@ - [ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ; +: parse-keys ( string -- chars ) + " " split [ hex> ] "" map-as ; : parse-ducet ( file -- ducet ) - ascii file-lines filter-comments - [ parse-line ] H{ } map>assoc ; + data [ [ parse-keys ] [ parse-weight ] bi* ] H{ } assoc-map-as ; "vocab:unicode/collation/allkeys.txt" parse-ducet to: ducet diff --git a/basis/unicode/data/data-docs.factor b/basis/unicode/data/data-docs.factor index 55fed31386..d1a458eb48 100644 --- a/basis/unicode/data/data-docs.factor +++ b/basis/unicode/data/data-docs.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2009 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. USING: help.syntax help.markup strings ; IN: unicode.data @@ -5,18 +7,14 @@ ABOUT: "unicode.data" ARTICLE: "unicode.data" "Unicode data tables" "The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files." -{ $subsection load-script } { $subsection canonical-entry } { $subsection combine-chars } { $subsection combining-class } { $subsection non-starter? } { $subsection name>char } { $subsection char>name } -{ $subsection property? } ; - -HELP: load-script -{ $values { "filename" string } { "table" "an interval map" } } -{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ; +{ $subsection property? } +{ $subsection load-key-value } ; HELP: canonical-entry { $values { "char" "a code point" } { "seq" string } } @@ -49,3 +47,7 @@ HELP: name>char HELP: property? { $values { "char" "a code point" } { "property" string } { "?" "a boolean" } } { $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ; + +HELP: load-key-value +{ $values { "filename" string } { "table" "an interval map" } } +{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ; diff --git a/basis/unicode/data/data.factor b/basis/unicode/data/data.factor index 74914e8537..e94036a85e 100644 --- a/basis/unicode/data/data.factor +++ b/basis/unicode/data/data.factor @@ -1,13 +1,15 @@ -! Copyright (C) 2008 Daniel Ehrenberg. +! Copyright (C) 2008, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: combinators.short-circuit assocs math kernel sequences io.files hashtables quotations splitting grouping arrays io math.parser hash2 math.order byte-arrays words namespaces words compiler.units parser io.encodings.ascii values interval-maps ascii sets combinators locals math.ranges sorting make -strings.parser io.encodings.utf8 memoize ; +strings.parser io.encodings.utf8 memoize simple-flat-file ; IN: unicode.data +char ( name -- char ) name-map at ; -: char>name ( char -- name ) name-map value-at ; -: property? ( char property -- ? ) properties at interval-key? ; +PRIVATE> + +VALUE: name-map + +: canonical-entry ( char -- seq ) canonical-map at ; inline +: combine-chars ( a b -- char/f ) combine-map hash2 ; inline +: compatibility-entry ( char -- seq ) compatibility-map at ; inline +: combining-class ( char -- n ) class-map at ; inline +: non-starter? ( char -- ? ) combining-class { 0 f } member? not ; inline +: name>char ( name -- char ) name-map at ; inline +: char>name ( char -- name ) name-map value-at ; inline +: property? ( char property -- ? ) properties at interval-key? ; inline +: ch>lower ( ch -- lower ) simple-lower at-default ; inline +: ch>upper ( ch -- upper ) simple-upper at-default ; inline +: ch>title ( ch -- title ) simple-title at-default ; inline +: special-case ( ch -- casing-tuple ) special-casing at ; inline + +! For non-existent characters, use Cn +CONSTANT: categories + { "Cn" + "Lu" "Ll" "Lt" "Lm" "Lo" + "Mn" "Mc" "Me" + "Nd" "Nl" "No" + "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po" + "Sm" "Sc" "Sk" "So" + "Zs" "Zl" "Zp" + "Cc" "Cf" "Cs" "Co" } + + [ swap ] H{ } assoc-map-as ; + +CONSTANT: num-chars HEX: 2FA1E + +PRIVATE> + +: category# ( char -- category ) + ! There are a few characters that should be Cn + ! that this gives Cf or Mn + ! Cf = 26; Mn = 5; Cn = 29 + ! Use a compressed array instead? + dup category-map ?nth [ ] [ + dup HEX: E0001 HEX: E007F between? + [ drop 26 ] [ + HEX: E0100 HEX: E01EF between? 5 29 ? + ] if + ] ?if ; + +: category ( char -- category ) + category# categories nth ; + +assoc [ [ hex> ] dip ] assoc-map ; @@ -97,22 +133,6 @@ VALUE: properties [ nip zero? not ] assoc-filter >hashtable ; -! For non-existent characters, use Cn -CONSTANT: categories - { "Cn" - "Lu" "Ll" "Lt" "Lm" "Lo" - "Mn" "Mc" "Me" - "Nd" "Nl" "No" - "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po" - "Sm" "Sc" "Sk" "So" - "Zs" "Zl" "Zp" - "Cc" "Cf" "Cs" "Co" } - -MEMO: categories-map ( -- hashtable ) - categories [ swap ] H{ } assoc-map-as ; - -CONSTANT: num-chars HEX: 2FA1E - ! the maximum unicode char in the first 3 planes : ?set-nth ( val index seq -- ) @@ -140,24 +160,26 @@ CONSTANT: num-chars HEX: 2FA1E : multihex ( hexstring -- string ) " " split [ hex> ] map sift ; +PRIVATE> + TUPLE: code-point lower title upper ; C: code-point + swap first set ; ! Extra properties -: properties-lines ( -- lines ) - "vocab:unicode/data/PropList.txt" - ascii file-lines ; - : parse-properties ( -- {{[a,b],prop}} ) - properties-lines filter-comments [ - split-; first2 - [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip - ] { } map>assoc ; + "vocab:unicode/data/PropList.txt" data [ + [ + ".." split1 [ dup ] unless* + [ hex> ] bi@ 2array + ] dip + ] assoc-map ; : properties>intervals ( properties -- assoc[str,interval] ) dup values prune [ f ] H{ } map>assoc @@ -195,14 +217,11 @@ load-special-casing to: special-casing load-properties to: properties -! Utility to load resource files that look like Scripts.txt +[ name>char [ "Invalid character" throw ] unless* ] +name>char-hook set-global SYMBOL: interned -: parse-script ( filename -- assoc ) - ! assoc is code point/range => name - ascii file-lines filter-comments [ split-; ] map ; - : range, ( value key -- ) swap interned get [ = ] with find nip 2array , ; @@ -216,12 +235,11 @@ SYMBOL: interned ] assoc-each ] { } make ; -: process-script ( ranges -- table ) +: process-key-value ( ranges -- table ) dup values prune interned [ expand-ranges ] with-variable ; -: load-script ( filename -- table ) - parse-script process-script ; +PRIVATE> -[ name>char [ "Invalid character" throw ] unless* ] -name>char-hook set-global +: load-key-value ( filename -- table ) + data process-key-value ; diff --git a/basis/unicode/normalize/normalize-tests.factor b/basis/unicode/normalize/normalize-tests.factor index f3ecb96af9..f774016272 100644 --- a/basis/unicode/normalize/normalize-tests.factor +++ b/basis/unicode/normalize/normalize-tests.factor @@ -1,5 +1,5 @@ USING: unicode.normalize kernel tools.test sequences -unicode.data io.encodings.utf8 io.files splitting math.parser +simple-flat-file io.encodings.utf8 io.files splitting math.parser locals math quotations assocs combinators unicode.normalize.private ; IN: unicode.normalize.tests @@ -23,9 +23,8 @@ IN: unicode.normalize.tests [ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test : parse-test ( -- tests ) - "vocab:unicode/normalize/NormalizationTest.txt" - utf8 file-lines filter-comments - [ ";" split 5 head [ " " split [ hex> ] "" map-as ] map ] map ; + "vocab:unicode/normalize/NormalizationTest.txt" data + [ 5 head [ " " split [ hex> ] "" map-as ] map ] map ; :: assert= ( test spec quot -- ) spec [ diff --git a/basis/unicode/script/script-docs.factor b/basis/unicode/script/script-docs.factor index 6612825c21..2860f83bef 100644 --- a/basis/unicode/script/script-docs.factor +++ b/basis/unicode/script/script-docs.factor @@ -1,6 +1,14 @@ -USING: help.syntax help.markup ; +! Copyright (C) 2009 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. +USING: help.syntax help.markup strings ; IN: unicode.script +ABOUT: "unicode.script" + +ARTICLE: "unicode.script" "Unicode script properties" +"The unicode standard gives every character a script. Note that this is different from a language, and that it is non-trivial to detect language from a string. To get the script of a character, use" +{ $subsection script-of } ; + HELP: script-of -{ $values { "char" "a code point" } { "script" "a symbol" } } -{ $description "Gets a symbol representing the code point of a given character. The word name of the symbol is the same as the one " } ; +{ $values { "char" "a code point" } { "script" string } } +{ $description "Finds the script of the given Unicode code point, represented as a string." } ; diff --git a/basis/unicode/script/script.factor b/basis/unicode/script/script.factor index 383f9e3de3..ed80476084 100644 --- a/basis/unicode/script/script.factor +++ b/basis/unicode/script/script.factor @@ -7,10 +7,14 @@ words words.symbol compiler.units arrays interval-maps unicode.data ; IN: unicode.script + + : script-of ( char -- script ) script-table interval-at ; diff --git a/basis/unicode/syntax/syntax.factor b/basis/unicode/syntax/syntax.factor index b7ac022d0e..5bd8c05e15 100644 --- a/basis/unicode/syntax/syntax.factor +++ b/basis/unicode/syntax/syntax.factor @@ -5,22 +5,7 @@ bit-arrays namespaces make sequences.private arrays quotations assocs classes.predicate math.order strings.parser ; IN: unicode.syntax -! Character classes (categories) - -: category# ( char -- category ) - ! There are a few characters that should be Cn - ! that this gives Cf or Mn - ! Cf = 26; Mn = 5; Cn = 29 - ! Use a compressed array instead? - dup category-map ?nth [ ] [ - dup HEX: E0001 HEX: E007F between? - [ drop 26 ] [ - HEX: E0100 HEX: E01EF between? 5 29 ? - ] if - ] ?if ; - -: category ( char -- category ) - category# categories nth ; +category-array ( categories -- bitarray ) categories [ swap member? ] with map >bit-array ; @@ -40,6 +25,8 @@ IN: unicode.syntax : define-category ( word categories -- ) [category] integer swap define-predicate-class ; +PRIVATE> + : CATEGORY: CREATE ";" parse-tokens define-category ; parsing diff --git a/basis/urls/urls-docs.factor b/basis/urls/urls-docs.factor index 437a9419e3..707caf3188 100644 --- a/basis/urls/urls-docs.factor +++ b/basis/urls/urls-docs.factor @@ -82,9 +82,9 @@ HELP: parse-host { $notes "This word is used by " { $link >url } ". It can also be used directly to parse " { $snippet "host:port" } " strings which are not full URLs." } { $examples { $example - "USING: prettyprint urls kernel ;" - "\"sbcl.org:80\" parse-host .s 2drop" - "\"sbcl.org\"\n80" + "USING: arrays kernel prettyprint urls ;" + "\"sbcl.org:80\" parse-host 2array ." + "{ \"sbcl.org\" 80 }" } } ; diff --git a/basis/xml/autoencoding/autoencoding.factor b/basis/xml/autoencoding/autoencoding.factor index fe4762acbe..63482ff706 100644 --- a/basis/xml/autoencoding/autoencoding.factor +++ b/basis/xml/autoencoding/autoencoding.factor @@ -3,7 +3,7 @@ USING: kernel namespaces xml.name io.encodings.utf8 xml.elements io.encodings.utf16 xml.tokenize xml.state math ascii sequences io.encodings.string io.encodings combinators accessors -xml.data io.encodings.iana ; +xml.data io.encodings.iana xml.errors ; IN: xml.autoencoding : decode-stream ( encoding -- ) @@ -35,7 +35,10 @@ IN: xml.autoencoding : prolog-encoding ( prolog -- ) encoding>> dup "UTF-16" = - [ drop ] [ name>encoding [ decode-stream ] when* ] if ; + [ drop ] [ + dup name>encoding + [ decode-stream ] [ bad-encoding ] ?if + ] if ; : instruct-encoding ( instruct/prolog -- ) dup prolog? diff --git a/basis/xml/errors/errors-tests.factor b/basis/xml/errors/errors-tests.factor index 8a469bc08f..10d7bb63ca 100644 --- a/basis/xml/errors/errors-tests.factor +++ b/basis/xml/errors/errors-tests.factor @@ -1,5 +1,5 @@ USING: continuations xml xml.errors tools.test kernel arrays -xml.data quotations fry ; +xml.data quotations fry byte-arrays ; IN: xml.errors.tests : xml-error-test ( expected-error xml-string -- ) @@ -40,3 +40,4 @@ T{ bad-doctype f 1 22 T{ opener { name T{ name f "" "foo" "" } } { attrs T{ attr T{ disallowed-char f 1 4 1 } "\u000001" xml-error-test T{ missing-close f 1 8 } "