diff --git a/basis/http/http.factor b/basis/http/http.factor index cda3460c71..2b5414b299 100755 --- a/basis/http/http.factor +++ b/basis/http/http.factor @@ -3,17 +3,11 @@ USING: accessors kernel combinators math namespaces make assocs sequences splitting sorting sets strings vectors hashtables quotations arrays byte-arrays math.parser calendar -calendar.format present urls - +calendar.format present urls fry io io.encodings io.encodings.iana io.encodings.binary io.encodings.8-bit io.crlf - unicode.case unicode.categories - http.parsers ; - -EXCLUDE: fry => , ; - IN: http : (read-header) ( -- alist ) @@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ; " " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ; : parse-content-type ( content-type -- type encoding ) - ";" split1 parse-content-type-attributes "charset" swap at - name>encoding over "text/" head? latin1 binary ? or ; + ";" split1 + parse-content-type-attributes "charset" swap at + [ name>encoding ] + [ dup "text/" head? latin1 binary ? ] if* ; diff --git a/basis/http/server/server-tests.factor b/basis/http/server/server-tests.factor index fdba9a63ef..171973fcd8 100644 --- a/basis/http/server/server-tests.factor +++ b/basis/http/server/server-tests.factor @@ -1,6 +1,21 @@ -USING: http http.server math sequences continuations tools.test ; +USING: http http.server math sequences continuations tools.test +io.encodings.utf8 io.encodings.binary accessors ; IN: http.server.tests [ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test \ make-http-error must-infer + +[ "text/plain; charset=UTF-8" ] [ + + "text/plain" >>content-type + utf8 >>content-charset + unparse-content-type +] unit-test + +[ "text/xml" ] [ + + "text/xml" >>content-type + binary >>content-charset + unparse-content-type +] unit-test \ No newline at end of file diff --git a/basis/http/server/server.factor b/basis/http/server/server.factor index 97c14a6457..b6ee70057b 100755 --- a/basis/http/server/server.factor +++ b/basis/http/server/server.factor @@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- ) tri ; : unparse-content-type ( request -- content-type ) - [ content-type>> "application/octet-stream" or ] - [ content-charset>> encoding>name ] - bi - [ "; charset=" glue ] when* ; + [ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi + dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ; : ensure-domain ( cookie -- cookie ) [ diff --git a/basis/io/encodings/8-bit/8-bit.factor b/basis/io/encodings/8-bit/8-bit.factor index f5b102ba31..a11edeb703 100644 --- a/basis/io/encodings/8-bit/8-bit.factor +++ b/basis/io/encodings/8-bit/8-bit.factor @@ -3,31 +3,33 @@ USING: math.parser arrays io.encodings sequences kernel assocs hashtables io.encodings.ascii generic parser classes.tuple words words.symbol io io.files splitting namespaces math -compiler.units accessors ; +compiler.units accessors classes.singleton classes.mixin +io.encodings.iana ; IN: io.encodings.8-bit > encode-8-bit ; M: 8-bit decode-char decode>> decode-8-bit ; -PREDICATE: 8-bit-encoding < word - 8-bit-encodings get-global key? ; +MIXIN: 8-bit-encoding M: 8-bit-encoding 8-bit-encodings get-global at ; @@ -74,15 +75,21 @@ M: 8-bit-encoding M: 8-bit-encoding 8-bit-encodings get-global at ; +: create-encoding ( name -- word ) + "io.encodings.8-bit" create + [ define-singleton-class ] + [ 8-bit-encoding add-mixin-instance ] + [ ] tri ; + PRIVATE> [ mappings [ - [ "io.encodings.8-bit" create ] + first3 + [ create-encoding ] + [ dupd register-encoding ] [ encoding-file parse-file 8-bit boa ] - bi* - ] assoc-map - [ keys [ define-symbol ] each ] - [ 8-bit-encodings set-global ] - bi + tri* + ] H{ } map>assoc + 8-bit-encodings set-global ] with-compilation-unit diff --git a/basis/io/encodings/ascii/ascii.factor b/basis/io/encodings/ascii/ascii.factor index d971cf2e60..deb1a7121f 100644 --- a/basis/io/encodings/ascii/ascii.factor +++ b/basis/io/encodings/ascii/ascii.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: io io.encodings kernel math io.encodings.private ; +USING: io io.encodings kernel math io.encodings.private io.encodings.iana ; IN: io.encodings.ascii byte-array ( string -- byte-array ) @@ -25,8 +35,8 @@ TUPLE: range ufirst ulast bfirst blast ; { [ "uFirst" attr hex> ] [ "uLast" attr hex> ] - [ "bFirst" attr b>byte-array ] - [ "bLast" attr b>byte-array ] + [ "bFirst" attr b>byte-array linear ] + [ "bLast" attr b>byte-array linear ] } cleave range boa ] dip push ; @@ -49,21 +59,13 @@ TUPLE: range ufirst ulast bfirst blast ; ] each-element mapping ranges ] ; -! Algorithms from: -! http://www-128.ibm.com/developerworks/library/u-china.html - -: linear ( bytes -- num ) - ! This hard-codes bMin and bMax - reverse first4 - 10 * + 126 * + 10 * + ; - : unlinear ( num -- bytes ) B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear - - 10 /mod swap [ HEX: 30 + ] dip - 126 /mod swap [ HEX: 81 + ] dip - 10 /mod swap [ HEX: 30 + ] dip + 10 /mod HEX: 30 + swap + 126 /mod HEX: 81 + swap + 10 /mod HEX: 30 + swap HEX: 81 + - B{ } 4sequence reverse ; + 4byte-array dup reverse-here ; : >interval-map-by ( start-quot end-quot value-quot seq -- interval-map ) '[ _ [ @ 2array ] _ tri ] { } map>assoc ; inline @@ -72,7 +74,7 @@ TUPLE: range ufirst ulast bfirst blast ; [ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ; : ranges-gb>u ( ranges -- interval-map ) - [ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ; + [ bfirst>> ] [ blast>> ] [ ] >interval-map-by ; VALUE: gb>u VALUE: u>gb @@ -85,7 +87,7 @@ ascii xml>gb-data : lookup-range ( char -- byte-array ) dup u>gb interval-at [ - [ ufirst>> - ] [ bfirst>> linear ] bi + unlinear + [ ufirst>> - ] [ bfirst>> ] bi + unlinear ] [ encode-error ] if* ; M: gb18030 encode-char ( char stream encoding -- ) @@ -107,19 +109,19 @@ M: gb18030 encode-char ( char stream encoding -- ) : decode-quad ( byte-array -- char ) dup mapping value-at [ ] [ linear dup gb>u interval-at [ - [ bfirst>> linear - ] [ ufirst>> ] bi + + [ bfirst>> - ] [ ufirst>> ] bi + ] [ drop replacement-char ] if* ] ?if ; : four-byte ( stream byte1 byte2 -- char ) rot 2 swap stream-read dup last-bytes? - [ first2 B{ } 4sequence decode-quad ] + [ first2 4byte-array decode-quad ] [ 3drop replacement-char ] if ; : two-byte ( stream byte -- char ) over stream-read1 { { [ dup not ] [ 3drop replacement-char ] } - { [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] } + { [ dup second-byte? ] [ 2byte-array mapping value-at nip ] } { [ dup quad-2/4? ] [ four-byte ] } [ 3drop replacement-char ] } cond ; @@ -127,7 +129,7 @@ M: gb18030 encode-char ( char stream encoding -- ) M: gb18030 decode-char ( stream encoding -- char ) drop dup stream-read1 { { [ dup not ] [ 2drop f ] } - { [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] } + { [ dup ascii? ] [ nip 1byte-array mapping value-at ] } { [ dup quad-1/3? ] [ two-byte ] } [ 2drop replacement-char ] } cond ; diff --git a/basis/io/encodings/iana/iana-docs.factor b/basis/io/encodings/iana/iana-docs.factor index d4a7a65797..c565d79ef5 100644 --- a/basis/io/encodings/iana/iana-docs.factor +++ b/basis/io/encodings/iana/iana-docs.factor @@ -1,12 +1,35 @@ -USING: help.syntax help.markup ; +USING: help.syntax help.markup strings ; IN: io.encodings.iana +ABOUT: "io.encodings.iana" + +ARTICLE: "io.encodings.iana" "IANA-registered encoding names" +"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl +"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:" +{ $subsection name>encoding } +{ $subsection encoding>name } +"To let a new encoding be used with the above words, use the following:" +{ $subsection register-encoding } +"Exceptions when encodings or names are not found:" +{ $subsection missing-encoding } +{ $subsection missing-name } ; + +HELP: missing-encoding +{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ; + +HELP: missing-name +{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ; + HELP: name>encoding { $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } } -{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ; +{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ; HELP: encoding>name { $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } } -{ "Given an encoding descriptor, return the preferred IANA name." } ; +{ $description "Given an encoding descriptor, return the preferred IANA name." } ; { name>encoding encoding>name } related-words + +HELP: register-encoding +{ $values { "descriptor" "an encoding descriptor" } { "name" string } } +{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ; diff --git a/basis/io/encodings/iana/iana-tests.factor b/basis/io/encodings/iana/iana-tests.factor index 8cee07b984..3175e624ce 100644 --- a/basis/io/encodings/iana/iana-tests.factor +++ b/basis/io/encodings/iana/iana-tests.factor @@ -1,5 +1,28 @@ -USING: io.encodings.iana io.encodings.ascii tools.test ; +USING: io.encodings.iana io.encodings.iana.private +io.encodings.utf8 tools.test assocs namespaces ; +IN: io.encodings.iana.tests -[ ascii ] [ "US-ASCII" name>encoding ] unit-test -[ ascii ] [ "ASCII" name>encoding ] unit-test -[ "US-ASCII" ] [ ascii encoding>name ] unit-test +[ utf8 ] [ "UTF-8" name>encoding ] unit-test +[ utf8 ] [ "utf8" name>encoding ] unit-test +[ "UTF-8" ] [ utf8 encoding>name ] unit-test + +! We will never implement EBCDIC-FI-SE-A +SINGLETON: ebcdic-fisea +ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding +[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test +[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test +[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test + +! Clean up after myself +[ ] [ + "EBCDIC-FI-SE-A" n>e-table get delete-at + "csEBCDICFISEA" n>e-table get delete-at + ebcdic-fisea e>n-table get delete-at +] unit-test +[ "EBCDIC-FI-SE-A" name>encoding ] must-fail +[ "csEBCDICFISEA" name>encoding ] must-fail +[ ebcdic-fisea encoding>name ] must-fail + +[ ebcdic-fisea "foobar" register-encoding ] must-fail +[ "foobar" name>encoding ] must-fail +[ ebcdic-fisea encoding>name ] must-fail diff --git a/basis/io/encodings/iana/iana.factor b/basis/io/encodings/iana/iana.factor index 5e192025fc..a56bd1194b 100644 --- a/basis/io/encodings/iana/iana.factor +++ b/basis/io/encodings/iana/iana.factor @@ -1,41 +1,24 @@ ! Copyright (C) 2008 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: kernel strings values io.files assocs -splitting sequences io namespaces sets io.encodings.8-bit -io.encodings.ascii io.encodings.utf8 io.encodings.utf16 -io.encodings.chinese io.encodings.japanese ; +splitting sequences io namespaces sets io.encodings.utf8 ; IN: io.encodings.iana e-table - -: e>n-table H{ - { ascii "US-ASCII" } - { utf8 "UTF-8" } - { utf16 "UTF-16" } - { utf16be "UTF-16BE" } - { utf16le "UTF-16LE" } - { latin1 "ISO-8859-1" } - { latin2 "ISO-8859-2" } - { latin3 "ISO-8859-3" } - { latin4 "ISO-8859-4" } - { latin/cyrillic "ISO-8859-5" } - { latin/arabic "ISO-8859-6" } - { latin/greek "ISO-8859-7" } - { latin/hebrew "ISO-8859-8" } - { latin5 "ISO-8859-9" } - { latin6 "ISO-8859-10" } - { shift-jis "Shift_JIS" } - { windows-31j "Windows-31J" } - { gb18030 "GB18030" } -} ; +SYMBOL: n>e-table +SYMBOL: e>n-table +SYMBOL: aliases PRIVATE> +ERROR: missing-encoding name ; + : name>encoding ( name -- encoding ) - n>e-table at ; + dup n>e-table get-global at [ ] [ missing-encoding ] ?if ; + +ERROR: missing-name encoding ; : encoding>name ( encoding -- name ) - e>n-table at ; + dup e>n-table get-global at [ ] [ missing-name ] ?if ; [ " " split ] map [ first { "Name:" "Alias:" } member? ] filter [ second ] map { "None" } diff - ] map ; + ] map harvest ; -: more-aliases ( -- assoc ) +: make-aliases ( stream -- n>e ) + parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ; + +: initial-n>e ( -- assoc ) H{ { "UTF8" utf8 } { "utf8" utf8 } { "utf-8" utf8 } - } ; + { "UTF-8" utf8 } + } clone ; + +: initial-e>n ( -- assoc ) + H{ { utf8 "UTF-8" } } clone ; -: make-n>e ( stream -- n>e ) - parse-iana [ [ - dup [ - e>n-table value-at - [ swap [ set ] with each ] - [ drop ] if* - ] with each - ] each ] H{ } make-assoc more-aliases assoc-union ; PRIVATE> "resource:basis/io/encodings/iana/character-sets" -ascii make-n>e to: n>e-table +utf8 make-aliases aliases set-global + +n>e-table global [ initial-n>e or ] change-at +e>n-table global [ initial-e>n or ] change-at + +: register-encoding ( descriptor name -- ) + [ + aliases get at [ + [ n>e-table get-global set-at ] with each + ] [ "Bad encoding registration" throw ] if* + ] [ swap e>n-table get-global set-at ] 2bi ; diff --git a/basis/io/encodings/japanese/japanese-docs.factor b/basis/io/encodings/japanese/japanese-docs.factor index e34f5736f2..48f94af7b4 100644 --- a/basis/io/encodings/japanese/japanese-docs.factor +++ b/basis/io/encodings/japanese/japanese-docs.factor @@ -4,7 +4,7 @@ USING: help.markup help.syntax ; IN: io.encodings.japanese ARTICLE: "io.encodings.japanese" "Japanese text encodings" -"The " { $vocab-link "io.encodings.japanese" } " vocabulary implements Japanese-specific text encodings. Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete." +"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete." { $subsection shift-jis } { $subsection windows-31j } ; diff --git a/basis/io/encodings/japanese/japanese.factor b/basis/io/encodings/japanese/japanese.factor index 3a66181db1..194ade377b 100644 --- a/basis/io/encodings/japanese/japanese.factor +++ b/basis/io/encodings/japanese/japanese.factor @@ -1,17 +1,31 @@ ! Copyright (C) 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: sequences kernel io io.files combinators.short-circuit -math.order values assocs io.encodings io.binary fry strings -math io.encodings.ascii arrays accessors splitting math.parser -biassocs ; +math.order values assocs io.encodings io.binary fry strings math +io.encodings.ascii arrays byte-arrays accessors splitting +math.parser biassocs io.encodings.iana ; IN: io.encodings.japanese -VALUE: shift-jis +SINGLETON: shift-jis -VALUE: windows-31j +shift-jis "Shift_JIS" register-encoding + +SINGLETON: windows-31j + +windows-31j "Windows-31J" register-encoding drop shift-jis-table ; +M: shift-jis drop shift-jis-table ; + +VALUE: windows-31j-table + +M: windows-31j drop windows-31j-table ; +M: windows-31j drop windows-31j-table ; + TUPLE: jis assoc ; : ( assoc -- jis ) @@ -31,17 +45,17 @@ TUPLE: jis assoc ; ascii file-lines process-jis ; "resource:basis/io/encodings/japanese/CP932.txt" -make-jis to: windows-31j +make-jis to: windows-31j-table "resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt" -make-jis to: shift-jis +make-jis to: shift-jis-table : small? ( char -- ? ) ! ASCII range or single-byte halfwidth katakana { [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ; : write-halfword ( stream halfword -- ) - h>b/b swap B{ } 2sequence swap stream-write ; + h>b/b swap 2byte-array swap stream-write ; M: jis encode-char swapd ch>jis diff --git a/basis/io/encodings/utf16/utf16.factor b/basis/io/encodings/utf16/utf16.factor index f8a6434d90..d61c07f806 100644 --- a/basis/io/encodings/utf16/utf16.factor +++ b/basis/io/encodings/utf16/utf16.factor @@ -1,15 +1,21 @@ -! Copyright (C) 2006, 2008 Daniel Ehrenberg. +! Copyright (C) 2006, 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: math kernel sequences sbufs vectors namespaces io.binary -io.encodings combinators splitting io byte-arrays ; +io.encodings combinators splitting io byte-arrays io.encodings.iana ; IN: io.encodings.utf16 SINGLETON: utf16be +utf16be "UTF-16BE" register-encoding + SINGLETON: utf16le +utf16le "UTF-16LE" register-encoding + SINGLETON: utf16 +utf16 "UTF-16" register-encoding + ERROR: missing-bom ; } ; + +HELP: wrap-lines +{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } } +{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ; + +HELP: wrap-string +{ $values { "string" string } { "width" integer } { "newstring" string } } +{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ; + +HELP: wrap-indented-string +{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } } +{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ; + +HELP: wrap +{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } } +{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ; + +HELP: word +{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link } "." } +{ $see-also wrap } ; + +HELP: +{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } } +{ $description "Creates a " { $link word } " object with the given parameters." } +{ $see-also wrap } ; diff --git a/basis/wrap/wrap-tests.factor b/basis/wrap/wrap-tests.factor index b2d18761e2..ba5168a1c2 100644 --- a/basis/wrap/wrap-tests.factor +++ b/basis/wrap/wrap-tests.factor @@ -1,5 +1,7 @@ -IN: wrap.tests +! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov +! See http://factorcode.org/license.txt for BSD license. USING: tools.test wrap multiline sequences ; +IN: wrap.tests [ { @@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ; } 35 wrap [ { } like ] map ] unit-test +[ + { + { + T{ word f 1 10 f } + T{ word f 2 10 f } + T{ word f 3 9 t } + T{ word f 3 9 t } + T{ word f 3 9 t } + } + { + T{ word f 4 10 f } + T{ word f 5 10 f } + } + } +] [ + { + T{ word f 1 10 f } + T{ word f 2 10 f } + T{ word f 3 9 t } + T{ word f 3 9 t } + T{ word f 3 9 t } + T{ word f 4 10 f } + T{ word f 5 10 f } + } 35 wrap [ { } like ] map +] unit-test + [ <" This is a long piece @@ -45,4 +73,10 @@ word wrap."> ] [ <" This is a long piece of text that we wish to word wrap."> 12 " " wrap-indented-string -] unit-test \ No newline at end of file +] unit-test + +[ "this text\nhas lots of\nspaces" ] +[ "this text has lots of spaces" 12 wrap-string ] unit-test + +[ "hello\nhow\nare\nyou\ntoday?" ] +[ "hello how are you today?" 3 wrap-string ] unit-test diff --git a/basis/wrap/wrap.factor b/basis/wrap/wrap.factor index 8e4e2753a8..e93509b58e 100644 --- a/basis/wrap/wrap.factor +++ b/basis/wrap/wrap.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov +! See http://factorcode.org/license.txt for BSD license. USING: sequences kernel namespaces make splitting math math.order fry assocs accessors ; IN: wrap @@ -15,12 +17,25 @@ SYMBOL: width : break-here? ( column word -- ? ) break?>> not [ width get > ] [ drop f ] if ; +: walk ( n words -- n ) + ! If on a break, take the rest of the breaks + ! If not on a break, go back until you hit a break + 2dup bounds-check? [ + 2dup nth break?>> + [ [ break?>> not ] find-from drop ] + [ [ break?>> ] find-last-from drop 1+ ] if + ] [ drop ] if ; + : find-optimal-break ( words -- n ) - [ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ; + [ 0 ] keep + [ [ width>> + dup ] keep break-here? ] find drop nip + [ 1 max swap walk ] [ drop f ] if* ; : (wrap) ( words -- ) - dup find-optimal-break - [ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ; + [ + dup find-optimal-break + [ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* + ] unless-empty ; : intersperse ( seq elt -- seq' ) [ '[ _ , ] [ , ] interleave ] { } make ; @@ -34,9 +49,7 @@ SYMBOL: width : join-words ( wrapped-lines -- lines ) [ - [ break?>> ] - [ trim-head-slice ] - [ trim-tail-slice ] bi + [ break?>> ] trim-slice [ key>> ] map concat ] map ; diff --git a/basis/xml/tests/encodings.factor b/basis/xml/tests/encodings.factor index 720b04ca42..35076d2930 100644 --- a/basis/xml/tests/encodings.factor +++ b/basis/xml/tests/encodings.factor @@ -1,4 +1,5 @@ -USING: xml xml.data xml.utilities tools.test accessors kernel ; +USING: xml xml.data xml.utilities tools.test accessors kernel +io.encodings.8-bit ; [ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test @@ -11,4 +12,4 @@ USING: xml xml.data xml.utilities tools.test accessors kernel ; [ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test [ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test -[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test \ No newline at end of file +[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test diff --git a/core/byte-arrays/byte-arrays-tests.factor b/core/byte-arrays/byte-arrays-tests.factor index edaea108a1..1c3e4d3bdf 100644 --- a/core/byte-arrays/byte-arrays-tests.factor +++ b/core/byte-arrays/byte-arrays-tests.factor @@ -9,3 +9,5 @@ USING: tools.test byte-arrays sequences kernel ; [ B{ 1 2 } ] [ 2 B{ 1 2 3 4 5 6 7 8 9 } resize-byte-array ] unit-test [ -10 B{ } resize-byte-array ] must-fail + +[ B{ 123 } ] [ 123 1byte-array ] unit-test \ No newline at end of file diff --git a/core/byte-arrays/byte-arrays.factor b/core/byte-arrays/byte-arrays.factor index f0d188ce4a..72989ac447 100644 --- a/core/byte-arrays/byte-arrays.factor +++ b/core/byte-arrays/byte-arrays.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2007, 2008 Slava Pestov. +! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel kernel.private alien.accessors sequences sequences.private math ; @@ -19,7 +19,7 @@ M: byte-array resize INSTANCE: byte-array sequence -: 1byte-array ( x -- byte-array ) 1 [ set-first ] keep ; inline +: 1byte-array ( x -- byte-array ) B{ } 1sequence ; inline : 2byte-array ( x y -- byte-array ) B{ } 2sequence ; inline diff --git a/core/combinators/combinators-tests.factor b/core/combinators/combinators-tests.factor index 5a56d2b636..1a73e22e31 100644 --- a/core/combinators/combinators-tests.factor +++ b/core/combinators/combinators-tests.factor @@ -1,6 +1,6 @@ USING: alien strings kernel math tools.test io prettyprint namespaces combinators words classes sequences accessors -math.functions ; +math.functions arrays ; IN: combinators.tests ! Compiled @@ -314,3 +314,13 @@ IN: combinators.tests \ test-case-7 must-infer [ "plus" ] [ \ + test-case-7 ] unit-test + +! Some corner cases (no pun intended) +DEFER: corner-case-1 + +<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >> + +[ t ] [ \ corner-case-1 optimized>> ] unit-test +[ 4 ] [ 2 corner-case-1 ] unit-test + +[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test \ No newline at end of file diff --git a/core/combinators/combinators.factor b/core/combinators/combinators.factor index c4c18c1c62..e356a6d246 100755 --- a/core/combinators/combinators.factor +++ b/core/combinators/combinators.factor @@ -59,13 +59,13 @@ ERROR: no-case ; ] [ dup wrapper? [ wrapped>> ] when ] if = - ] [ quotation? ] if + ] [ callable? ] if ] find nip ; : case ( obj assoc -- ) case-find { { [ dup array? ] [ nip second call ] } - { [ dup quotation? ] [ call ] } + { [ dup callable? ] [ call ] } { [ dup not ] [ no-case ] } } cond ; diff --git a/core/continuations/continuations-docs.factor b/core/continuations/continuations-docs.factor index 2cc44bee1b..2bf59f7780 100644 --- a/core/continuations/continuations-docs.factor +++ b/core/continuations/continuations-docs.factor @@ -246,8 +246,8 @@ HELP: retry { $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." } { $examples "Try to get a 0 as a random number:" - { $unchecked-example "USING: continuations math prettyprint ;" - "[ 5 random 0 = ] 5 retry t" + { $unchecked-example "USING: continuations math prettyprint random ;" + "[ 5 random 0 = ] 5 retry" "t" } } ; diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index f213be4fe7..6ca782a202 100755 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -207,6 +207,10 @@ HELP: first4-unsafe { $values { "seq" sequence } { "first" "the first element" } { "second" "the second element" } { "third" "the third element" } { "fourth" "the fourth element" } } { $contract "Unsafe variant of " { $link first4 } " that does not perform bounds checks." } ; +HELP: 1sequence +{ $values { "obj" object } { "exemplar" sequence } { "seq" sequence } } +{ $description "Creates a one-element sequence of the same type as " { $snippet "exemplar" } "." } ; + HELP: 2sequence { $values { "obj1" object } { "obj2" object } { "exemplar" sequence } { "seq" sequence } } { $description "Creates a two-element sequence of the same type as " { $snippet "exemplar" } "." } ; diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index 2c30a62fe3..9e64cfa536 100755 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -137,9 +137,12 @@ INSTANCE: iota immutable-sequence : from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline +: (1sequence) ( obj seq -- seq ) + [ 0 swap set-nth-unsafe ] keep ; inline + : (2sequence) ( obj1 obj2 seq -- seq ) [ 1 swap set-nth-unsafe ] keep - [ 0 swap set-nth-unsafe ] keep ; inline + (1sequence) ; inline : (3sequence) ( obj1 obj2 obj3 seq -- seq ) [ 2 swap set-nth-unsafe ] keep @@ -151,6 +154,9 @@ INSTANCE: iota immutable-sequence PRIVATE> +: 1sequence ( obj exemplar -- seq ) + 1 swap [ (1sequence) ] new-like ; inline + : 2sequence ( obj1 obj2 exemplar -- seq ) 2 swap [ (2sequence) ] new-like ; inline diff --git a/core/vectors/vectors-tests.factor b/core/vectors/vectors-tests.factor index f2e29d79e8..12e2ea49f7 100644 --- a/core/vectors/vectors-tests.factor +++ b/core/vectors/vectors-tests.factor @@ -97,3 +97,5 @@ IN: vectors.tests [ fixnum ] [ 1 >bignum V{ } new-sequence length class ] unit-test [ fixnum ] [ 1 >bignum [ ] V{ } map-as length class ] unit-test + +[ V{ "lulz" } ] [ "lulz" 1vector ] unit-test \ No newline at end of file diff --git a/core/vectors/vectors.factor b/core/vectors/vectors.factor index a6bfef71d0..1bdda7b69d 100644 --- a/core/vectors/vectors.factor +++ b/core/vectors/vectors.factor @@ -40,7 +40,7 @@ M: sequence new-resizable drop ; INSTANCE: vector growable -: 1vector ( x -- vector ) 1array >vector ; +: 1vector ( x -- vector ) V{ } 1sequence ; : ?push ( elt seq/f -- seq ) [ 1 ] unless* [ push ] keep ; diff --git a/extra/reports/noise/noise.factor b/extra/reports/noise/noise.factor index 1ce7f9c726..3e47adac0b 100755 --- a/extra/reports/noise/noise.factor +++ b/extra/reports/noise/noise.factor @@ -53,7 +53,6 @@ IN: reports.noise { nipd 3 } { nkeep 5 } { npick 6 } - { nrev 5 } { nrot 5 } { nslip 5 } { ntuck 6 }