Merge branch 'master' into new_ui
commit
32bde32018
|
@ -16,13 +16,22 @@ HELP: once-at
|
|||
{ $values { "value" object } { "key" object } { "assoc" assoc } }
|
||||
{ $description "If the assoc does not contain the given key, adds the key/value pair to the assoc, otherwise does nothing." } ;
|
||||
|
||||
HELP: >biassoc
|
||||
{ $values { "assoc" assoc } { "biassoc" biassoc } }
|
||||
{ $description "Costructs a new biassoc with the same key/value pairs as the given assoc." } ;
|
||||
|
||||
ARTICLE: "biassocs" "Bidirectional assocs"
|
||||
"A " { $emphasis "bidirectional assoc" } " combines a pair of assocs to form a data structure where both normal assoc opeartions (eg, " { $link at } "), as well as " { $link "assocs-values" } " (eg, " { $link value-at } ") run in sub-linear time."
|
||||
$nl
|
||||
"Bidirectional assocs implement the entire assoc protocol with the exception of " { $link delete-at } ". Duplicate values are allowed, however value lookups with " { $link value-at } " only return the first key that a given value was stored with."
|
||||
"Bidirectional assocs implement the entire " { $link "assoc-protocol" } " with the exception of " { $link delete-at } ". Duplicate values are allowed, however value lookups with " { $link value-at } " only return the first key that a given value was stored with."
|
||||
$nl
|
||||
"The class of biassocs:"
|
||||
{ $subsection biassoc }
|
||||
{ $subsection biassoc? }
|
||||
"Creating new biassocs:"
|
||||
{ $subsection <biassoc> }
|
||||
{ $subsection <bihash> } ;
|
||||
{ $subsection <bihash> }
|
||||
"Converting existing assocs to biassocs:"
|
||||
{ $subsection >biassoc } ;
|
||||
|
||||
ABOUT: "biassocs"
|
||||
|
|
|
@ -20,3 +20,13 @@ USING: biassocs assocs namespaces tools.test ;
|
|||
[ 2 ] [ 1 "h" get value-at ] unit-test
|
||||
|
||||
[ 2 ] [ "h" get assoc-size ] unit-test
|
||||
|
||||
H{ { "a" "A" } { "b" "B" } } "a" set
|
||||
|
||||
[ ] [ "a" get >biassoc "b" set ] unit-test
|
||||
|
||||
[ t ] [ "b" get biassoc? ] unit-test
|
||||
|
||||
[ "A" ] [ "a" "b" get at ] unit-test
|
||||
|
||||
[ "a" ] [ "A" "b" get value-at ] unit-test
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel assocs accessors summary ;
|
||||
USING: kernel assocs accessors summary hashtables ;
|
||||
IN: biassocs
|
||||
|
||||
TUPLE: biassoc from to ;
|
||||
|
@ -37,4 +37,10 @@ M: biassoc >alist
|
|||
M: biassoc clear-assoc
|
||||
[ from>> clear-assoc ] [ to>> clear-assoc ] bi ;
|
||||
|
||||
M: biassoc new-assoc
|
||||
drop [ <hashtable> ] [ <hashtable> ] bi biassoc boa ;
|
||||
|
||||
INSTANCE: biassoc assoc
|
||||
|
||||
: >biassoc ( assoc -- biassoc )
|
||||
T{ biassoc } assoc-clone-like ;
|
|
@ -45,7 +45,7 @@ IN: io.encodings.8-bit
|
|||
: ch>byte ( assoc -- newassoc )
|
||||
[ swap ] assoc-map >hashtable ;
|
||||
|
||||
: parse-file ( path -- byte>ch ch>byte )
|
||||
: parse-file ( stream -- byte>ch ch>byte )
|
||||
lines process-contents
|
||||
[ byte>ch ] [ ch>byte ] bi ;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,19 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax ;
|
||||
IN: io.encodings.japanese
|
||||
|
||||
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
|
||||
"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
|
||||
{ $subsection shift-jis }
|
||||
{ $subsection windows-31j } ;
|
||||
|
||||
ABOUT: "io.encodings.japanese"
|
||||
|
||||
HELP: windows-31j
|
||||
{ $class-description "The encoding descriptor Windows-31J, which is sometimes informally called Shift JIS. This is based on Code Page 932." }
|
||||
{ $see-also "encodings-introduction" shift-jis } ;
|
||||
|
||||
HELP: shift-jis
|
||||
{ $class-description "The encoding descriptor for Shift JIS, or JIS X 208:1997 Appendix 1. Microsoft extensions are not included." }
|
||||
{ $see-also "encodings-introduction" windows-31j } ;
|
|
@ -0,0 +1,17 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.japanese tools.test io.encodings.string arrays strings ;
|
||||
IN: io.encodings.japanese.tests
|
||||
|
||||
[ { CHAR: replacement-character } ] [ { 141 } shift-jis decode >array ] unit-test
|
||||
[ "" ] [ "" shift-jis decode >string ] unit-test
|
||||
[ "" ] [ "" shift-jis encode >string ] unit-test
|
||||
[ { CHAR: replacement-character } shift-jis encode ] must-fail
|
||||
[ "ab¥ィ" ] [ { CHAR: a CHAR: b HEX: 5C HEX: A8 } shift-jis decode ] unit-test
|
||||
[ { CHAR: a CHAR: b HEX: 5C HEX: A8 } ] [ "ab¥ィ" shift-jis encode >array ] unit-test
|
||||
[ "ab\\ィ" ] [ { CHAR: a CHAR: b HEX: 5C HEX: A8 } windows-31j decode ] unit-test
|
||||
[ { CHAR: a CHAR: b HEX: 5C HEX: A8 } ] [ "ab\\ィ" windows-31j encode >array ] unit-test
|
||||
[ "\u000081\u0000c8" ] [ CHAR: logical-and 1string windows-31j encode >string ] unit-test
|
||||
[ "\u000081\u0000c8" ] [ CHAR: logical-and 1string shift-jis encode >string ] unit-test
|
||||
[ { CHAR: logical-and } ] [ "\u000081\u0000c8" windows-31j decode >array ] unit-test
|
||||
[ { CHAR: logical-and } ] [ "\u000081\u0000c8" shift-jis decode >array ] unit-test
|
|
@ -0,0 +1,61 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences kernel io io.files combinators.short-circuit
|
||||
math.order values assocs io.encodings io.binary fry strings
|
||||
math io.encodings.ascii arrays accessors splitting math.parser
|
||||
biassocs ;
|
||||
IN: io.encodings.japanese
|
||||
|
||||
VALUE: shift-jis
|
||||
|
||||
VALUE: windows-31j
|
||||
|
||||
<PRIVATE
|
||||
|
||||
TUPLE: jis assoc ;
|
||||
|
||||
: <jis> ( assoc -- jis )
|
||||
[ nip ] assoc-filter H{ } assoc-like
|
||||
>biassoc jis boa ;
|
||||
|
||||
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
|
||||
: jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
|
||||
|
||||
: process-jis ( lines -- assoc )
|
||||
[ "#" split1 drop ] map harvest [
|
||||
"\t" split 2 head
|
||||
[ 2 short tail hex> ] map
|
||||
] map ;
|
||||
|
||||
: make-jis ( filename -- jis )
|
||||
ascii file-lines process-jis <jis> ;
|
||||
|
||||
"resource:basis/io/encodings/japanese/CP932.txt"
|
||||
make-jis to: windows-31j
|
||||
|
||||
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
|
||||
make-jis to: shift-jis
|
||||
|
||||
: small? ( char -- ? )
|
||||
! ASCII range or single-byte halfwidth katakana
|
||||
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
|
||||
|
||||
: write-halfword ( stream halfword -- )
|
||||
h>b/b swap B{ } 2sequence swap stream-write ;
|
||||
|
||||
M: jis encode-char
|
||||
swapd ch>jis
|
||||
dup small?
|
||||
[ swap stream-write1 ]
|
||||
[ write-halfword ] if ;
|
||||
|
||||
M: jis decode-char
|
||||
swap dup stream-read1 [
|
||||
dup small? [ nip swap jis>ch ] [
|
||||
swap stream-read1
|
||||
[ 2array be> swap jis>ch ]
|
||||
[ 2drop replacement-char ] if*
|
||||
] if
|
||||
] [ 2drop f ] if* ;
|
||||
|
||||
PRIVATE>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Japanese text encodings
|
|
@ -0,0 +1 @@
|
|||
text
|
|
@ -1,3 +1,5 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax io.encodings strings ;
|
||||
IN: io.encodings.utf16
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel tools.test io.encodings.utf16 arrays sbufs
|
||||
io.streams.byte-array sequences io.encodings io
|
||||
bootstrap.unicode
|
||||
io.encodings.string alien.c-types alien.strings accessors classes ;
|
||||
IN: io.encodings.utf16.tests
|
||||
|
||||
|
@ -15,7 +16,6 @@ IN: io.encodings.utf16.tests
|
|||
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } utf16le decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } utf16le decode >array ] unit-test
|
||||
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test
|
||||
|
||||
[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16le encode >array ] unit-test
|
||||
|
||||
|
|
|
@ -101,13 +101,9 @@ M: utf16le encode-char ( char stream encoding -- )
|
|||
|
||||
! UTF-16
|
||||
|
||||
: bom-le B{ HEX: ff HEX: fe } ; inline
|
||||
CONSTANT: bom-le B{ HEX: ff HEX: fe }
|
||||
|
||||
: bom-be B{ HEX: fe HEX: ff } ; inline
|
||||
|
||||
: start-utf16le? ( seq1 -- seq2 ? ) bom-le ?head ;
|
||||
|
||||
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
|
||||
CONSTANT: bom-be B{ HEX: fe HEX: ff }
|
||||
|
||||
: bom>le/be ( bom -- le/be )
|
||||
dup bom-le sequence= [ drop utf16le ] [
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1 @@
|
|||
UTF32 encoding/decoding
|
|
@ -0,0 +1 @@
|
|||
text
|
|
@ -0,0 +1,27 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax io.encodings strings ;
|
||||
IN: io.encodings.utf32
|
||||
|
||||
ARTICLE: "io.encodings.utf32" "UTF-32 encoding"
|
||||
"The UTF-32 encoding is a fixed-width encoding. Unicode code points are encoded as 4 byte sequences. There are three encoding descriptor classes for working with UTF-32, depending on endianness or the presence of a BOM:"
|
||||
{ $subsection utf32 }
|
||||
{ $subsection utf32le }
|
||||
{ $subsection utf32be } ;
|
||||
|
||||
ABOUT: "io.encodings.utf32"
|
||||
|
||||
HELP: utf32le
|
||||
{ $class-description "The encoding descriptor for UTF-32LE, that is, UTF-32 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
|
||||
{ $see-also "encodings-introduction" } ;
|
||||
|
||||
HELP: utf32be
|
||||
{ $class-description "The encoding descriptor for UTF-32BE, that is, UTF-32 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
|
||||
{ $see-also "encodings-introduction" } ;
|
||||
|
||||
HELP: utf32
|
||||
{ $class-description "The encoding descriptor for UTF-32, that is, UTF-32 with a byte order mark. This is the most useful for general input and output in UTF-32. Streams can be made which read or write wth this encoding." }
|
||||
{ $see-also "encodings-introduction" } ;
|
||||
|
||||
{ utf32 utf32le utf32be } related-words
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel tools.test io.encodings.utf32 arrays sbufs
|
||||
io.streams.byte-array sequences io.encodings io
|
||||
io.encodings.string alien.c-types alien.strings accessors classes ;
|
||||
IN: io.encodings.utf32.tests
|
||||
|
||||
[ { CHAR: x } ] [ { 0 0 0 CHAR: x } utf32be decode >array ] unit-test
|
||||
[ { HEX: 1D11E } ] [ { 0 1 HEX: D1 HEX: 1E } utf32be decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { 0 1 HEX: D1 } utf32be decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { 0 1 } utf32be decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { 0 } utf32be decode >array ] unit-test
|
||||
[ { } ] [ { } utf32be decode >array ] unit-test
|
||||
|
||||
[ { 0 0 0 CHAR: x 0 1 HEX: D1 HEX: 1E } ] [ { CHAR: x HEX: 1d11e } utf32be encode >array ] unit-test
|
||||
|
||||
[ { CHAR: x } ] [ { CHAR: x 0 0 0 } utf32le decode >array ] unit-test
|
||||
[ { HEX: 1d11e } ] [ { HEX: 1e HEX: d1 1 0 } utf32le decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { HEX: 1e HEX: d1 1 } utf32le decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { HEX: 1e HEX: d1 } utf32le decode >array ] unit-test
|
||||
[ { CHAR: replacement-character } ] [ { HEX: 1e } utf32le decode >array ] unit-test
|
||||
[ { } ] [ { } utf32le decode >array ] unit-test
|
||||
|
||||
[ { 120 0 0 0 HEX: 1e HEX: d1 1 0 } ] [ { CHAR: x HEX: 1d11e } utf32le encode >array ] unit-test
|
||||
|
||||
[ { CHAR: x } ] [ { HEX: ff HEX: fe 0 0 CHAR: x 0 0 0 } utf32 decode >array ] unit-test
|
||||
[ { CHAR: x } ] [ { 0 0 HEX: fe HEX: ff 0 0 0 CHAR: x } utf32 decode >array ] unit-test
|
||||
|
||||
[ { HEX: ff HEX: fe 0 0 120 0 0 0 HEX: 1e HEX: d1 1 0 } ] [ { CHAR: x HEX: 1d11e } utf32 encode >array ] unit-test
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel io.encodings combinators io io.encodings.utf16
|
||||
sequences io.binary ;
|
||||
IN: io.encodings.utf32
|
||||
|
||||
SINGLETON: utf32be
|
||||
|
||||
SINGLETON: utf32le
|
||||
|
||||
SINGLETON: utf32
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! Decoding
|
||||
|
||||
: char> ( stream encoding quot -- ch )
|
||||
nip swap 4 swap stream-read dup length {
|
||||
{ 0 [ 2drop f ] }
|
||||
{ 4 [ swap call ] }
|
||||
[ 3drop replacement-char ]
|
||||
} case ; inline
|
||||
|
||||
M: utf32be decode-char
|
||||
[ be> ] char> ;
|
||||
|
||||
M: utf32le decode-char
|
||||
[ le> ] char> ;
|
||||
|
||||
! Encoding
|
||||
|
||||
: >char ( char stream encoding quot -- )
|
||||
nip 4 swap curry dip stream-write ; inline
|
||||
|
||||
M: utf32be encode-char
|
||||
[ >be ] >char ;
|
||||
|
||||
M: utf32le encode-char
|
||||
[ >le ] >char ;
|
||||
|
||||
! UTF-32
|
||||
|
||||
CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
|
||||
|
||||
CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
|
||||
|
||||
: bom>le/be ( bom -- le/be )
|
||||
dup bom-le sequence= [ drop utf32le ] [
|
||||
bom-be sequence= [ utf32be ] [ missing-bom ] if
|
||||
] if ;
|
||||
|
||||
M: utf32 <decoder> ( stream utf32 -- decoder )
|
||||
drop 4 over stream-read bom>le/be <decoder> ;
|
||||
|
||||
M: utf32 <encoder> ( stream utf32 -- encoder )
|
||||
drop bom-le over stream-write utf32le <encoder> ;
|
|
@ -10,3 +10,7 @@ IN: io.binary.tests
|
|||
[ 1234 ] [ 1234 4 >le le> ] unit-test
|
||||
|
||||
[ fixnum ] [ B{ 0 0 0 0 0 0 0 0 0 0 } be> class ] unit-test
|
||||
|
||||
[ HEX: 56780000 HEX: 12340000 ] [ HEX: 1234000056780000 d>w/w ] unit-test
|
||||
[ HEX: 5678 HEX: 1234 ] [ HEX: 12345678 w>h/h ] unit-test
|
||||
[ HEX: 34 HEX: 12 ] [ HEX: 1234 h>b/b ] unit-test
|
||||
|
|
|
@ -14,13 +14,13 @@ IN: io.binary
|
|||
: >be ( x n -- byte-array ) >le dup reverse-here ;
|
||||
|
||||
: d>w/w ( d -- w1 w2 )
|
||||
dup HEX: ffffffff bitand
|
||||
swap -32 shift HEX: ffffffff bitand ;
|
||||
[ HEX: ffffffff bitand ]
|
||||
[ -32 shift HEX: ffffffff bitand ] bi ;
|
||||
|
||||
: w>h/h ( w -- h1 h2 )
|
||||
dup HEX: ffff bitand
|
||||
swap -16 shift HEX: ffff bitand ;
|
||||
[ HEX: ffff bitand ]
|
||||
[ -16 shift HEX: ffff bitand ] bi ;
|
||||
|
||||
: h>b/b ( h -- b1 b2 )
|
||||
dup mask-byte
|
||||
swap -8 shift mask-byte ;
|
||||
[ mask-byte ]
|
||||
[ -8 shift mask-byte ] bi ;
|
||||
|
|
|
@ -78,6 +78,7 @@ ARTICLE: "encodings-descriptors" "Encoding descriptors"
|
|||
{ $subsection "io.encodings.binary" }
|
||||
{ $subsection "io.encodings.utf8" }
|
||||
{ $subsection "io.encodings.utf16" }
|
||||
{ $vocab-subsection "UTF-32 encoding" "io.encodings.utf32" }
|
||||
{ $vocab-subsection "Strict encodings" "io.encodings.strict" }
|
||||
"Legacy encodings:"
|
||||
{ $vocab-subsection "8-bit encodings" "io.encodings.8-bit" }
|
||||
|
|
Loading…
Reference in New Issue