Merge branch 'master' of git://factorcode.org/git/factor

db4
John Benediktsson 2009-02-04 07:42:46 -08:00
commit 7282010cec
29 changed files with 342 additions and 145 deletions

View File

@ -3,17 +3,11 @@
USING: accessors kernel combinators math namespaces make assocs USING: accessors kernel combinators math namespaces make assocs
sequences splitting sorting sets strings vectors hashtables sequences splitting sorting sets strings vectors hashtables
quotations arrays byte-arrays math.parser calendar quotations arrays byte-arrays math.parser calendar
calendar.format present urls calendar.format present urls fry
io io.encodings io.encodings.iana io.encodings.binary io io.encodings io.encodings.iana io.encodings.binary
io.encodings.8-bit io.crlf io.encodings.8-bit io.crlf
unicode.case unicode.categories unicode.case unicode.categories
http.parsers ; http.parsers ;
EXCLUDE: fry => , ;
IN: http IN: http
: (read-header) ( -- alist ) : (read-header) ( -- alist )
@ -217,5 +211,7 @@ TUPLE: post-data data params content-type content-encoding ;
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ; " " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
: parse-content-type ( content-type -- type encoding ) : parse-content-type ( content-type -- type encoding )
";" split1 parse-content-type-attributes "charset" swap at ";" split1
name>encoding over "text/" head? latin1 binary ? or ; parse-content-type-attributes "charset" swap at
[ name>encoding ]
[ dup "text/" head? latin1 binary ? ] if* ;

View File

@ -1,6 +1,21 @@
USING: http http.server math sequences continuations tools.test ; USING: http http.server math sequences continuations tools.test
io.encodings.utf8 io.encodings.binary accessors ;
IN: http.server.tests IN: http.server.tests
[ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test [ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test
\ make-http-error must-infer \ make-http-error must-infer
[ "text/plain; charset=UTF-8" ] [
<response>
"text/plain" >>content-type
utf8 >>content-charset
unparse-content-type
] unit-test
[ "text/xml" ] [
<response>
"text/xml" >>content-type
binary >>content-charset
unparse-content-type
] unit-test

View File

@ -97,10 +97,8 @@ GENERIC: write-full-response ( request response -- )
tri ; tri ;
: unparse-content-type ( request -- content-type ) : unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ] [ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi
[ content-charset>> encoding>name ] dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ;
bi
[ "; charset=" glue ] when* ;
: ensure-domain ( cookie -- cookie ) : ensure-domain ( cookie -- cookie )
[ [

View File

@ -3,31 +3,33 @@
USING: math.parser arrays io.encodings sequences kernel assocs USING: math.parser arrays io.encodings sequences kernel assocs
hashtables io.encodings.ascii generic parser classes.tuple words hashtables io.encodings.ascii generic parser classes.tuple words
words.symbol io io.files splitting namespaces math words.symbol io io.files splitting namespaces math
compiler.units accessors ; compiler.units accessors classes.singleton classes.mixin
io.encodings.iana ;
IN: io.encodings.8-bit IN: io.encodings.8-bit
<PRIVATE <PRIVATE
: mappings { : mappings {
{ "latin1" "8859-1" } ! encoding-name iana-name file-name
{ "latin2" "8859-2" } { "latin1" "ISO_8859-1:1987" "8859-1" }
{ "latin3" "8859-3" } { "latin2" "ISO_8859-2:1987" "8859-2" }
{ "latin4" "8859-4" } { "latin3" "ISO_8859-3:1988" "8859-3" }
{ "latin/cyrillic" "8859-5" } { "latin4" "ISO_8859-4:1988" "8859-4" }
{ "latin/arabic" "8859-6" } { "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
{ "latin/greek" "8859-7" } { "latin/arabic" "ISO_8859-6:1987" "8859-6" }
{ "latin/hebrew" "8859-8" } { "latin/greek" "ISO_8859-7:1987" "8859-7" }
{ "latin5" "8859-9" } { "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
{ "latin6" "8859-10" } { "latin5" "ISO_8859-9:1989" "8859-9" }
{ "latin/thai" "8859-11" } { "latin6" "ISO-8859-10" "8859-10" }
{ "latin7" "8859-13" } { "latin/thai" "TIS-620" "8859-11" }
{ "latin8" "8859-14" } { "latin7" "ISO-8859-13" "8859-13" }
{ "latin9" "8859-15" } { "latin8" "ISO-8859-14" "8859-14" }
{ "latin10" "8859-16" } { "latin9" "ISO-8859-15" "8859-15" }
{ "koi8-r" "KOI8-R" } { "latin10" "ISO-8859-16" "8859-16" }
{ "windows-1252" "CP1252" } { "koi8-r" "KOI8-R" "KOI8-R" }
{ "ebcdic" "CP037" } { "windows-1252" "windows-1252" "CP1252" }
{ "mac-roman" "ROMAN" } { "ebcdic" "IBM037" "CP037" }
{ "mac-roman" "macintosh" "ROMAN" }
} ; } ;
: encoding-file ( file-name -- stream ) : encoding-file ( file-name -- stream )
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;
M: 8-bit decode-char decode>> decode-8-bit ; M: 8-bit decode-char decode>> decode-8-bit ;
PREDICATE: 8-bit-encoding < word MIXIN: 8-bit-encoding
8-bit-encodings get-global key? ;
M: 8-bit-encoding <encoder> M: 8-bit-encoding <encoder>
8-bit-encodings get-global at <encoder> ; 8-bit-encodings get-global at <encoder> ;
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
M: 8-bit-encoding <decoder> M: 8-bit-encoding <decoder>
8-bit-encodings get-global at <decoder> ; 8-bit-encodings get-global at <decoder> ;
: create-encoding ( name -- word )
"io.encodings.8-bit" create
[ define-singleton-class ]
[ 8-bit-encoding add-mixin-instance ]
[ ] tri ;
PRIVATE> PRIVATE>
[ [
mappings [ mappings [
[ "io.encodings.8-bit" create ] first3
[ create-encoding ]
[ dupd register-encoding ]
[ encoding-file parse-file 8-bit boa ] [ encoding-file parse-file 8-bit boa ]
bi* tri*
] assoc-map ] H{ } map>assoc
[ keys [ define-symbol ] each ] 8-bit-encodings set-global
[ 8-bit-encodings set-global ]
bi
] with-compilation-unit ] with-compilation-unit

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings kernel math io.encodings.private ; USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
IN: io.encodings.ascii IN: io.encodings.ascii
<PRIVATE <PRIVATE
@ -20,3 +20,5 @@ M: ascii encode-char
M: ascii decode-char M: ascii decode-char
128 decode-if< ; 128 decode-if< ;
ascii "ANSI_X3.4-1968" register-encoding

View File

@ -1,13 +1,15 @@
! Copyright (C) 2009 Daniel Ehrenberg ! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: xml xml.data kernel io io.encodings interval-maps splitting fry USING: xml xml.data kernel io io.encodings interval-maps splitting fry
math.parser sequences combinators assocs locals accessors math math.parser sequences combinators assocs locals accessors math arrays
arrays values io.encodings.ascii ascii io.files biassocs math.order byte-arrays values io.encodings.ascii ascii io.files biassocs
combinators.short-circuit io.binary ; math.order combinators.short-circuit io.binary io.encodings.iana ;
IN: io.encodings.chinese IN: io.encodings.chinese
SINGLETON: gb18030 SINGLETON: gb18030
gb18030 "GB18030" register-encoding
<PRIVATE <PRIVATE
! GB to mean GB18030 is a terrible abuse of notation ! GB to mean GB18030 is a terrible abuse of notation
@ -15,6 +17,14 @@ SINGLETON: gb18030
! Resource file from: ! Resource file from:
! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml ! http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
! Algorithms from:
! http://www-128.ibm.com/developerworks/library/u-china.html
: linear ( bytes -- num )
! This hard-codes bMin and bMax
reverse first4
10 * + 126 * + 10 * + ; foldable
TUPLE: range ufirst ulast bfirst blast ; TUPLE: range ufirst ulast bfirst blast ;
: b>byte-array ( string -- byte-array ) : b>byte-array ( string -- byte-array )
@ -25,8 +35,8 @@ TUPLE: range ufirst ulast bfirst blast ;
{ {
[ "uFirst" attr hex> ] [ "uFirst" attr hex> ]
[ "uLast" attr hex> ] [ "uLast" attr hex> ]
[ "bFirst" attr b>byte-array ] [ "bFirst" attr b>byte-array linear ]
[ "bLast" attr b>byte-array ] [ "bLast" attr b>byte-array linear ]
} cleave range boa } cleave range boa
] dip push ; ] dip push ;
@ -49,21 +59,13 @@ TUPLE: range ufirst ulast bfirst blast ;
] each-element mapping ranges ] each-element mapping ranges
] ; ] ;
! Algorithms from:
! http://www-128.ibm.com/developerworks/library/u-china.html
: linear ( bytes -- num )
! This hard-codes bMin and bMax
reverse first4
10 * + 126 * + 10 * + ;
: unlinear ( num -- bytes ) : unlinear ( num -- bytes )
B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear - B{ HEX: 81 HEX: 30 HEX: 81 HEX: 30 } linear -
10 /mod swap [ HEX: 30 + ] dip 10 /mod HEX: 30 + swap
126 /mod swap [ HEX: 81 + ] dip 126 /mod HEX: 81 + swap
10 /mod swap [ HEX: 30 + ] dip 10 /mod HEX: 30 + swap
HEX: 81 + HEX: 81 +
B{ } 4sequence reverse ; 4byte-array dup reverse-here ;
: >interval-map-by ( start-quot end-quot value-quot seq -- interval-map ) : >interval-map-by ( start-quot end-quot value-quot seq -- interval-map )
'[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline '[ _ [ @ 2array ] _ tri ] { } map>assoc <interval-map> ; inline
@ -72,7 +74,7 @@ TUPLE: range ufirst ulast bfirst blast ;
[ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ; [ ufirst>> ] [ ulast>> ] [ ] >interval-map-by ;
: ranges-gb>u ( ranges -- interval-map ) : ranges-gb>u ( ranges -- interval-map )
[ bfirst>> linear ] [ blast>> linear ] [ ] >interval-map-by ; [ bfirst>> ] [ blast>> ] [ ] >interval-map-by ;
VALUE: gb>u VALUE: gb>u
VALUE: u>gb VALUE: u>gb
@ -85,7 +87,7 @@ ascii <file-reader> xml>gb-data
: lookup-range ( char -- byte-array ) : lookup-range ( char -- byte-array )
dup u>gb interval-at [ dup u>gb interval-at [
[ ufirst>> - ] [ bfirst>> linear ] bi + unlinear [ ufirst>> - ] [ bfirst>> ] bi + unlinear
] [ encode-error ] if* ; ] [ encode-error ] if* ;
M: gb18030 encode-char ( char stream encoding -- ) M: gb18030 encode-char ( char stream encoding -- )
@ -107,19 +109,19 @@ M: gb18030 encode-char ( char stream encoding -- )
: decode-quad ( byte-array -- char ) : decode-quad ( byte-array -- char )
dup mapping value-at [ ] [ dup mapping value-at [ ] [
linear dup gb>u interval-at [ linear dup gb>u interval-at [
[ bfirst>> linear - ] [ ufirst>> ] bi + [ bfirst>> - ] [ ufirst>> ] bi +
] [ drop replacement-char ] if* ] [ drop replacement-char ] if*
] ?if ; ] ?if ;
: four-byte ( stream byte1 byte2 -- char ) : four-byte ( stream byte1 byte2 -- char )
rot 2 swap stream-read dup last-bytes? rot 2 swap stream-read dup last-bytes?
[ first2 B{ } 4sequence decode-quad ] [ first2 4byte-array decode-quad ]
[ 3drop replacement-char ] if ; [ 3drop replacement-char ] if ;
: two-byte ( stream byte -- char ) : two-byte ( stream byte -- char )
over stream-read1 { over stream-read1 {
{ [ dup not ] [ 3drop replacement-char ] } { [ dup not ] [ 3drop replacement-char ] }
{ [ dup second-byte? ] [ B{ } 2sequence mapping value-at nip ] } { [ dup second-byte? ] [ 2byte-array mapping value-at nip ] }
{ [ dup quad-2/4? ] [ four-byte ] } { [ dup quad-2/4? ] [ four-byte ] }
[ 3drop replacement-char ] [ 3drop replacement-char ]
} cond ; } cond ;
@ -127,7 +129,7 @@ M: gb18030 encode-char ( char stream encoding -- )
M: gb18030 decode-char ( stream encoding -- char ) M: gb18030 decode-char ( stream encoding -- char )
drop dup stream-read1 { drop dup stream-read1 {
{ [ dup not ] [ 2drop f ] } { [ dup not ] [ 2drop f ] }
{ [ dup ascii? ] [ nip 1array B{ } like mapping value-at ] } { [ dup ascii? ] [ nip 1byte-array mapping value-at ] }
{ [ dup quad-1/3? ] [ two-byte ] } { [ dup quad-1/3? ] [ two-byte ] }
[ 2drop replacement-char ] [ 2drop replacement-char ]
} cond ; } cond ;

View File

@ -1,12 +1,35 @@
USING: help.syntax help.markup ; USING: help.syntax help.markup strings ;
IN: io.encodings.iana IN: io.encodings.iana
ABOUT: "io.encodings.iana"
ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:"
{ $subsection name>encoding }
{ $subsection encoding>name }
"To let a new encoding be used with the above words, use the following:"
{ $subsection register-encoding }
"Exceptions when encodings or names are not found:"
{ $subsection missing-encoding }
{ $subsection missing-name } ;
HELP: missing-encoding
{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
HELP: missing-name
{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
HELP: name>encoding HELP: name>encoding
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } } { $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ; { $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
HELP: encoding>name HELP: encoding>name
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } } { $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
{ "Given an encoding descriptor, return the preferred IANA name." } ; { $description "Given an encoding descriptor, return the preferred IANA name." } ;
{ name>encoding encoding>name } related-words { name>encoding encoding>name } related-words
HELP: register-encoding
{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;

View File

@ -1,5 +1,28 @@
USING: io.encodings.iana io.encodings.ascii tools.test ; USING: io.encodings.iana io.encodings.iana.private
io.encodings.utf8 tools.test assocs namespaces ;
IN: io.encodings.iana.tests
[ ascii ] [ "US-ASCII" name>encoding ] unit-test [ utf8 ] [ "UTF-8" name>encoding ] unit-test
[ ascii ] [ "ASCII" name>encoding ] unit-test [ utf8 ] [ "utf8" name>encoding ] unit-test
[ "US-ASCII" ] [ ascii encoding>name ] unit-test [ "UTF-8" ] [ utf8 encoding>name ] unit-test
! We will never implement EBCDIC-FI-SE-A
SINGLETON: ebcdic-fisea
ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
! Clean up after myself
[ ] [
"EBCDIC-FI-SE-A" n>e-table get delete-at
"csEBCDICFISEA" n>e-table get delete-at
ebcdic-fisea e>n-table get delete-at
] unit-test
[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
[ "csEBCDICFISEA" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail
[ ebcdic-fisea "foobar" register-encoding ] must-fail
[ "foobar" name>encoding ] must-fail
[ ebcdic-fisea encoding>name ] must-fail

View File

@ -1,41 +1,24 @@
! Copyright (C) 2008 Daniel Ehrenberg ! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel strings values io.files assocs USING: kernel strings values io.files assocs
splitting sequences io namespaces sets io.encodings.8-bit splitting sequences io namespaces sets io.encodings.utf8 ;
io.encodings.ascii io.encodings.utf8 io.encodings.utf16
io.encodings.chinese io.encodings.japanese ;
IN: io.encodings.iana IN: io.encodings.iana
<PRIVATE <PRIVATE
VALUE: n>e-table SYMBOL: n>e-table
SYMBOL: e>n-table
: e>n-table H{ SYMBOL: aliases
{ ascii "US-ASCII" }
{ utf8 "UTF-8" }
{ utf16 "UTF-16" }
{ utf16be "UTF-16BE" }
{ utf16le "UTF-16LE" }
{ latin1 "ISO-8859-1" }
{ latin2 "ISO-8859-2" }
{ latin3 "ISO-8859-3" }
{ latin4 "ISO-8859-4" }
{ latin/cyrillic "ISO-8859-5" }
{ latin/arabic "ISO-8859-6" }
{ latin/greek "ISO-8859-7" }
{ latin/hebrew "ISO-8859-8" }
{ latin5 "ISO-8859-9" }
{ latin6 "ISO-8859-10" }
{ shift-jis "Shift_JIS" }
{ windows-31j "Windows-31J" }
{ gb18030 "GB18030" }
} ;
PRIVATE> PRIVATE>
ERROR: missing-encoding name ;
: name>encoding ( name -- encoding ) : name>encoding ( name -- encoding )
n>e-table at ; dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
ERROR: missing-name encoding ;
: encoding>name ( encoding -- name ) : encoding>name ( encoding -- name )
e>n-table at ; dup e>n-table get-global at [ ] [ missing-name ] ?if ;
<PRIVATE <PRIVATE
: parse-iana ( stream -- synonym-set ) : parse-iana ( stream -- synonym-set )
@ -43,24 +26,33 @@ PRIVATE>
[ " " split ] map [ " " split ] map
[ first { "Name:" "Alias:" } member? ] filter [ first { "Name:" "Alias:" } member? ] filter
[ second ] map { "None" } diff [ second ] map { "None" } diff
] map ; ] map harvest ;
: more-aliases ( -- assoc ) : make-aliases ( stream -- n>e )
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
: initial-n>e ( -- assoc )
H{ H{
{ "UTF8" utf8 } { "UTF8" utf8 }
{ "utf8" utf8 } { "utf8" utf8 }
{ "utf-8" utf8 } { "utf-8" utf8 }
} ; { "UTF-8" utf8 }
} clone ;
: initial-e>n ( -- assoc )
H{ { utf8 "UTF-8" } } clone ;
: make-n>e ( stream -- n>e )
parse-iana [ [
dup [
e>n-table value-at
[ swap [ set ] with each ]
[ drop ] if*
] with each
] each ] H{ } make-assoc more-aliases assoc-union ;
PRIVATE> PRIVATE>
"resource:basis/io/encodings/iana/character-sets" "resource:basis/io/encodings/iana/character-sets"
ascii <file-reader> make-n>e to: n>e-table utf8 <file-reader> make-aliases aliases set-global
n>e-table global [ initial-n>e or ] change-at
e>n-table global [ initial-e>n or ] change-at
: register-encoding ( descriptor name -- )
[
aliases get at [
[ n>e-table get-global set-at ] with each
] [ "Bad encoding registration" throw ] if*
] [ swap e>n-table get-global set-at ] 2bi ;

View File

@ -4,7 +4,7 @@ USING: help.markup help.syntax ;
IN: io.encodings.japanese IN: io.encodings.japanese
ARTICLE: "io.encodings.japanese" "Japanese text encodings" ARTICLE: "io.encodings.japanese" "Japanese text encodings"
"The " { $vocab-link "io.encodings.japanese" } " vocabulary implements Japanese-specific text encodings. Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete." "Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
{ $subsection shift-jis } { $subsection shift-jis }
{ $subsection windows-31j } ; { $subsection windows-31j } ;

View File

@ -1,17 +1,31 @@
! Copyright (C) 2009 Daniel Ehrenberg ! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel io io.files combinators.short-circuit USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings math.order values assocs io.encodings io.binary fry strings math
math io.encodings.ascii arrays accessors splitting math.parser io.encodings.ascii arrays byte-arrays accessors splitting
biassocs ; math.parser biassocs io.encodings.iana ;
IN: io.encodings.japanese IN: io.encodings.japanese
VALUE: shift-jis SINGLETON: shift-jis
VALUE: windows-31j shift-jis "Shift_JIS" register-encoding
SINGLETON: windows-31j
windows-31j "Windows-31J" register-encoding
<PRIVATE <PRIVATE
VALUE: shift-jis-table
M: shift-jis <encoder> drop shift-jis-table <encoder> ;
M: shift-jis <decoder> drop shift-jis-table <decoder> ;
VALUE: windows-31j-table
M: windows-31j <encoder> drop windows-31j-table <encoder> ;
M: windows-31j <decoder> drop windows-31j-table <decoder> ;
TUPLE: jis assoc ; TUPLE: jis assoc ;
: <jis> ( assoc -- jis ) : <jis> ( assoc -- jis )
@ -31,17 +45,17 @@ TUPLE: jis assoc ;
ascii file-lines process-jis <jis> ; ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt" "resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j make-jis to: windows-31j-table
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt" "resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
make-jis to: shift-jis make-jis to: shift-jis-table
: small? ( char -- ? ) : small? ( char -- ? )
! ASCII range or single-byte halfwidth katakana ! ASCII range or single-byte halfwidth katakana
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ; { [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
: write-halfword ( stream halfword -- ) : write-halfword ( stream halfword -- )
h>b/b swap B{ } 2sequence swap stream-write ; h>b/b swap 2byte-array swap stream-write ;
M: jis encode-char M: jis encode-char
swapd ch>jis swapd ch>jis

View File

@ -1,15 +1,21 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg. ! Copyright (C) 2006, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary USING: math kernel sequences sbufs vectors namespaces io.binary
io.encodings combinators splitting io byte-arrays ; io.encodings combinators splitting io byte-arrays io.encodings.iana ;
IN: io.encodings.utf16 IN: io.encodings.utf16
SINGLETON: utf16be SINGLETON: utf16be
utf16be "UTF-16BE" register-encoding
SINGLETON: utf16le SINGLETON: utf16le
utf16le "UTF-16LE" register-encoding
SINGLETON: utf16 SINGLETON: utf16
utf16 "UTF-16" register-encoding
ERROR: missing-bom ; ERROR: missing-bom ;
<PRIVATE <PRIVATE

View File

@ -1,15 +1,21 @@
! Copyright (C) 2009 Daniel Ehrenberg. ! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel io.encodings combinators io io.encodings.utf16 USING: math kernel io.encodings combinators io io.encodings.utf16
sequences io.binary ; sequences io.binary io.encodings.iana ;
IN: io.encodings.utf32 IN: io.encodings.utf32
SINGLETON: utf32be SINGLETON: utf32be
utf32be "UTF-32BE" register-encoding
SINGLETON: utf32le SINGLETON: utf32le
utf32le "UTF-32LE" register-encoding
SINGLETON: utf32 SINGLETON: utf32
utf32 "UTF-32" register-encoding
<PRIVATE <PRIVATE
! Decoding ! Decoding

View File

@ -70,7 +70,7 @@ IN: stack-checker.transforms
[ [
[ no-case ] [ no-case ]
] [ ] [
dup peek quotation? [ dup peek callable? [
dup peek swap but-last dup peek swap but-last
] [ ] [
[ no-case ] swap [ no-case ] swap

View File

@ -1 +1,2 @@
Daniel Ehrenberg Daniel Ehrenberg
Slava Pestov

View File

@ -0,0 +1,41 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup strings math kernel ;
IN: wrap
ABOUT: "wrap"
ARTICLE: "wrap" "Word wrapping"
"The " { $vocab-link "wrap" } " vocabulary implements word wrapping. There is support for simple string wrapping, with the following words:"
{ $subsection wrap-lines }
{ $subsection wrap-string }
{ $subsection wrap-indented-string }
"Additionally, the vocabulary provides capabilities to wrap arbitrary groups of things, in units called words."
{ $subsection wrap }
{ $subsection word }
{ $subsection <word> } ;
HELP: wrap-lines
{ $values { "lines" string } { "width" integer } { "newlines" "sequence of strings" } }
{ $description "Given a string, divides it into a sequence of lines where each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-string
{ $values { "string" string } { "width" integer } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space." } ;
HELP: wrap-indented-string
{ $values { "string" string } { "width" integer } { "indent" string } { "newstring" string } }
{ $description "Given a string, alters the whitespace in the string so that each line has no more than " { $snippet "width" } " characters, unless there is a word longer than " { $snippet "width" } ". Linear whitespace between words is converted to a single space. Before each line, the indent string is added." } ;
HELP: wrap
{ $values { "words" { "a sequence of " { $instance word } "s" } } { "width" integer } { "lines" "a sequence of sequences of words" } }
{ $description "Divides the words into lines, where the sum of the lengths of the words on a line (not counting breaks at the end of the line) is at most the given width. Every line except for the first one starts with a non-break, and every one but the last ends with a break." } ;
HELP: word
{ $class-description "A word, for the purposes of " { $vocab-link "wrap" } ", is a Factor object annotated with a length (in the " { $snippet "width" } " slot) and knowledge about whether it is an allowable position for an optional line break (in the " { $snippet "break?" } " slot). Words can be created with " { $link <word> } "." }
{ $see-also wrap } ;
HELP: <word>
{ $values { "key" object } { "width" integer } { "break?" { { $link t } " or " { $link POSTPONE: f } } } { "word" word } }
{ $description "Creates a " { $link word } " object with the given parameters." }
{ $see-also wrap } ;

View File

@ -1,5 +1,7 @@
IN: wrap.tests ! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test wrap multiline sequences ; USING: tools.test wrap multiline sequences ;
IN: wrap.tests
[ [
{ {
@ -23,6 +25,32 @@ USING: tools.test wrap multiline sequences ;
} 35 wrap [ { } like ] map } 35 wrap [ { } like ] map
] unit-test ] unit-test
[
{
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
}
{
T{ word f 4 10 f }
T{ word f 5 10 f }
}
}
] [
{
T{ word f 1 10 f }
T{ word f 2 10 f }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 3 9 t }
T{ word f 4 10 f }
T{ word f 5 10 f }
} 35 wrap [ { } like ] map
] unit-test
[ [
<" This is a <" This is a
long piece long piece
@ -46,3 +74,9 @@ word wrap.">
<" This is a long piece of text that we wish to word wrap."> 12 <" This is a long piece of text that we wish to word wrap."> 12
" " wrap-indented-string " " wrap-indented-string
] unit-test ] unit-test
[ "this text\nhas lots of\nspaces" ]
[ "this text has lots of spaces" 12 wrap-string ] unit-test
[ "hello\nhow\nare\nyou\ntoday?" ]
[ "hello how are you today?" 3 wrap-string ] unit-test

View File

@ -1,3 +1,5 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel namespaces make splitting USING: sequences kernel namespaces make splitting
math math.order fry assocs accessors ; math math.order fry assocs accessors ;
IN: wrap IN: wrap
@ -15,12 +17,25 @@ SYMBOL: width
: break-here? ( column word -- ? ) : break-here? ( column word -- ? )
break?>> not [ width get > ] [ drop f ] if ; break?>> not [ width get > ] [ drop f ] if ;
: walk ( n words -- n )
! If on a break, take the rest of the breaks
! If not on a break, go back until you hit a break
2dup bounds-check? [
2dup nth break?>>
[ [ break?>> not ] find-from drop ]
[ [ break?>> ] find-last-from drop 1+ ] if
] [ drop ] if ;
: find-optimal-break ( words -- n ) : find-optimal-break ( words -- n )
[ 0 ] dip [ [ width>> + dup ] keep break-here? ] find drop nip ; [ 0 ] keep
[ [ width>> + dup ] keep break-here? ] find drop nip
[ 1 max swap walk ] [ drop f ] if* ;
: (wrap) ( words -- ) : (wrap) ( words -- )
dup find-optimal-break [
[ 1 max cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if* ; dup find-optimal-break
[ cut-slice [ , ] [ (wrap) ] bi* ] [ , ] if*
] unless-empty ;
: intersperse ( seq elt -- seq' ) : intersperse ( seq elt -- seq' )
[ '[ _ , ] [ , ] interleave ] { } make ; [ '[ _ , ] [ , ] interleave ] { } make ;
@ -34,9 +49,7 @@ SYMBOL: width
: join-words ( wrapped-lines -- lines ) : join-words ( wrapped-lines -- lines )
[ [
[ break?>> ] [ break?>> ] trim-slice
[ trim-head-slice ]
[ trim-tail-slice ] bi
[ key>> ] map concat [ key>> ] map concat
] map ; ] map ;

View File

@ -1,4 +1,5 @@
USING: xml xml.data xml.utilities tools.test accessors kernel ; USING: xml xml.data xml.utilities tools.test accessors kernel
io.encodings.8-bit ;
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test [ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test

View File

@ -9,3 +9,5 @@ USING: tools.test byte-arrays sequences kernel ;
[ B{ 1 2 } ] [ 2 B{ 1 2 3 4 5 6 7 8 9 } resize-byte-array ] unit-test [ B{ 1 2 } ] [ 2 B{ 1 2 3 4 5 6 7 8 9 } resize-byte-array ] unit-test
[ -10 B{ } resize-byte-array ] must-fail [ -10 B{ } resize-byte-array ] must-fail
[ B{ 123 } ] [ 123 1byte-array ] unit-test

View File

@ -1,4 +1,4 @@
! Copyright (C) 2007, 2008 Slava Pestov. ! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel kernel.private alien.accessors sequences USING: accessors kernel kernel.private alien.accessors sequences
sequences.private math ; sequences.private math ;
@ -19,7 +19,7 @@ M: byte-array resize
INSTANCE: byte-array sequence INSTANCE: byte-array sequence
: 1byte-array ( x -- byte-array ) 1 <byte-array> [ set-first ] keep ; inline : 1byte-array ( x -- byte-array ) B{ } 1sequence ; inline
: 2byte-array ( x y -- byte-array ) B{ } 2sequence ; inline : 2byte-array ( x y -- byte-array ) B{ } 2sequence ; inline

View File

@ -1,6 +1,6 @@
USING: alien strings kernel math tools.test io prettyprint USING: alien strings kernel math tools.test io prettyprint
namespaces combinators words classes sequences accessors namespaces combinators words classes sequences accessors
math.functions ; math.functions arrays ;
IN: combinators.tests IN: combinators.tests
! Compiled ! Compiled
@ -314,3 +314,13 @@ IN: combinators.tests
\ test-case-7 must-infer \ test-case-7 must-infer
[ "plus" ] [ \ + test-case-7 ] unit-test [ "plus" ] [ \ + test-case-7 ] unit-test
! Some corner cases (no pun intended)
DEFER: corner-case-1
<< \ corner-case-1 2 [ + ] curry 1array [ case ] curry (( a -- b )) define-declared >>
[ t ] [ \ corner-case-1 optimized>> ] unit-test
[ 4 ] [ 2 corner-case-1 ] unit-test
[ 4 ] [ 2 2 [ + ] curry 1array case ] unit-test

View File

@ -59,13 +59,13 @@ ERROR: no-case ;
] [ ] [
dup wrapper? [ wrapped>> ] when dup wrapper? [ wrapped>> ] when
] if = ] if =
] [ quotation? ] if ] [ callable? ] if
] find nip ; ] find nip ;
: case ( obj assoc -- ) : case ( obj assoc -- )
case-find { case-find {
{ [ dup array? ] [ nip second call ] } { [ dup array? ] [ nip second call ] }
{ [ dup quotation? ] [ call ] } { [ dup callable? ] [ call ] }
{ [ dup not ] [ no-case ] } { [ dup not ] [ no-case ] }
} cond ; } cond ;

View File

@ -246,8 +246,8 @@ HELP: retry
{ $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." } { $description "Tries the quotation up to " { $snippet "n" } " times until it returns true. Retries the quotation if an exception is thrown or if the quotation returns " { $link f } ". The quotation is expected to have side effects that may fail, such as generating a random name for a new file until successful." }
{ $examples { $examples
"Try to get a 0 as a random number:" "Try to get a 0 as a random number:"
{ $unchecked-example "USING: continuations math prettyprint ;" { $unchecked-example "USING: continuations math prettyprint random ;"
"[ 5 random 0 = ] 5 retry t" "[ 5 random 0 = ] 5 retry"
"t" "t"
} }
} ; } ;

View File

@ -207,6 +207,10 @@ HELP: first4-unsafe
{ $values { "seq" sequence } { "first" "the first element" } { "second" "the second element" } { "third" "the third element" } { "fourth" "the fourth element" } } { $values { "seq" sequence } { "first" "the first element" } { "second" "the second element" } { "third" "the third element" } { "fourth" "the fourth element" } }
{ $contract "Unsafe variant of " { $link first4 } " that does not perform bounds checks." } ; { $contract "Unsafe variant of " { $link first4 } " that does not perform bounds checks." } ;
HELP: 1sequence
{ $values { "obj" object } { "exemplar" sequence } { "seq" sequence } }
{ $description "Creates a one-element sequence of the same type as " { $snippet "exemplar" } "." } ;
HELP: 2sequence HELP: 2sequence
{ $values { "obj1" object } { "obj2" object } { "exemplar" sequence } { "seq" sequence } } { $values { "obj1" object } { "obj2" object } { "exemplar" sequence } { "seq" sequence } }
{ $description "Creates a two-element sequence of the same type as " { $snippet "exemplar" } "." } ; { $description "Creates a two-element sequence of the same type as " { $snippet "exemplar" } "." } ;

View File

@ -137,9 +137,12 @@ INSTANCE: iota immutable-sequence
: from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline : from-end ( seq n -- seq n' ) [ dup length ] dip - ; inline
: (1sequence) ( obj seq -- seq )
[ 0 swap set-nth-unsafe ] keep ; inline
: (2sequence) ( obj1 obj2 seq -- seq ) : (2sequence) ( obj1 obj2 seq -- seq )
[ 1 swap set-nth-unsafe ] keep [ 1 swap set-nth-unsafe ] keep
[ 0 swap set-nth-unsafe ] keep ; inline (1sequence) ; inline
: (3sequence) ( obj1 obj2 obj3 seq -- seq ) : (3sequence) ( obj1 obj2 obj3 seq -- seq )
[ 2 swap set-nth-unsafe ] keep [ 2 swap set-nth-unsafe ] keep
@ -151,6 +154,9 @@ INSTANCE: iota immutable-sequence
PRIVATE> PRIVATE>
: 1sequence ( obj exemplar -- seq )
1 swap [ (1sequence) ] new-like ; inline
: 2sequence ( obj1 obj2 exemplar -- seq ) : 2sequence ( obj1 obj2 exemplar -- seq )
2 swap [ (2sequence) ] new-like ; inline 2 swap [ (2sequence) ] new-like ; inline

View File

@ -97,3 +97,5 @@ IN: vectors.tests
[ fixnum ] [ 1 >bignum V{ } new-sequence length class ] unit-test [ fixnum ] [ 1 >bignum V{ } new-sequence length class ] unit-test
[ fixnum ] [ 1 >bignum [ ] V{ } map-as length class ] unit-test [ fixnum ] [ 1 >bignum [ ] V{ } map-as length class ] unit-test
[ V{ "lulz" } ] [ "lulz" 1vector ] unit-test

View File

@ -40,7 +40,7 @@ M: sequence new-resizable drop <vector> ;
INSTANCE: vector growable INSTANCE: vector growable
: 1vector ( x -- vector ) 1array >vector ; : 1vector ( x -- vector ) V{ } 1sequence ;
: ?push ( elt seq/f -- seq ) : ?push ( elt seq/f -- seq )
[ 1 <vector> ] unless* [ push ] keep ; [ 1 <vector> ] unless* [ push ] keep ;

View File

@ -53,7 +53,6 @@ IN: reports.noise
{ nipd 3 } { nipd 3 }
{ nkeep 5 } { nkeep 5 }
{ npick 6 } { npick 6 }
{ nrev 5 }
{ nrot 5 } { nrot 5 }
{ nslip 5 } { nslip 5 }
{ ntuck 6 } { ntuck 6 }