Merge branch 'master' into new_ui

db4
Slava Pestov 2009-02-02 00:22:48 -06:00
commit 32bde32018
24 changed files with 17327 additions and 19 deletions

View File

@ -16,13 +16,22 @@ HELP: once-at
{ $values { "value" object } { "key" object } { "assoc" assoc } }
{ $description "If the assoc does not contain the given key, adds the key/value pair to the assoc, otherwise does nothing." } ;
HELP: >biassoc
{ $values { "assoc" assoc } { "biassoc" biassoc } }
{ $description "Costructs a new biassoc with the same key/value pairs as the given assoc." } ;
ARTICLE: "biassocs" "Bidirectional assocs"
"A " { $emphasis "bidirectional assoc" } " combines a pair of assocs to form a data structure where both normal assoc opeartions (eg, " { $link at } "), as well as " { $link "assocs-values" } " (eg, " { $link value-at } ") run in sub-linear time."
$nl
"Bidirectional assocs implement the entire assoc protocol with the exception of " { $link delete-at } ". Duplicate values are allowed, however value lookups with " { $link value-at } " only return the first key that a given value was stored with."
"Bidirectional assocs implement the entire " { $link "assoc-protocol" } " with the exception of " { $link delete-at } ". Duplicate values are allowed, however value lookups with " { $link value-at } " only return the first key that a given value was stored with."
$nl
"The class of biassocs:"
{ $subsection biassoc }
{ $subsection biassoc? }
"Creating new biassocs:"
{ $subsection <biassoc> }
{ $subsection <bihash> } ;
{ $subsection <bihash> }
"Converting existing assocs to biassocs:"
{ $subsection >biassoc } ;
ABOUT: "biassocs"

View File

@ -20,3 +20,13 @@ USING: biassocs assocs namespaces tools.test ;
[ 2 ] [ 1 "h" get value-at ] unit-test
[ 2 ] [ "h" get assoc-size ] unit-test
H{ { "a" "A" } { "b" "B" } } "a" set
[ ] [ "a" get >biassoc "b" set ] unit-test
[ t ] [ "b" get biassoc? ] unit-test
[ "A" ] [ "a" "b" get at ] unit-test
[ "a" ] [ "A" "b" get value-at ] unit-test

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Slava Pestov.
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel assocs accessors summary ;
USING: kernel assocs accessors summary hashtables ;
IN: biassocs
TUPLE: biassoc from to ;
@ -37,4 +37,10 @@ M: biassoc >alist
M: biassoc clear-assoc
[ from>> clear-assoc ] [ to>> clear-assoc ] bi ;
M: biassoc new-assoc
drop [ <hashtable> ] [ <hashtable> ] bi biassoc boa ;
INSTANCE: biassoc assoc
: >biassoc ( assoc -- biassoc )
T{ biassoc } assoc-clone-like ;

View File

@ -45,7 +45,7 @@ IN: io.encodings.8-bit
: ch>byte ( assoc -- newassoc )
[ swap ] assoc-map >hashtable ;
: parse-file ( path -- byte>ch ch>byte )
: parse-file ( stream -- byte>ch ch>byte )
lines process-contents
[ byte>ch ] [ ch>byte ] bi ;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1,19 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax ;
IN: io.encodings.japanese
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
{ $subsection shift-jis }
{ $subsection windows-31j } ;
ABOUT: "io.encodings.japanese"
HELP: windows-31j
{ $class-description "The encoding descriptor Windows-31J, which is sometimes informally called Shift JIS. This is based on Code Page 932." }
{ $see-also "encodings-introduction" shift-jis } ;
HELP: shift-jis
{ $class-description "The encoding descriptor for Shift JIS, or JIS X 208:1997 Appendix 1. Microsoft extensions are not included." }
{ $see-also "encodings-introduction" windows-31j } ;

View File

@ -0,0 +1,17 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings.japanese tools.test io.encodings.string arrays strings ;
IN: io.encodings.japanese.tests
[ { CHAR: replacement-character } ] [ { 141 } shift-jis decode >array ] unit-test
[ "" ] [ "" shift-jis decode >string ] unit-test
[ "" ] [ "" shift-jis encode >string ] unit-test
[ { CHAR: replacement-character } shift-jis encode ] must-fail
[ "ab¥ィ" ] [ { CHAR: a CHAR: b HEX: 5C HEX: A8 } shift-jis decode ] unit-test
[ { CHAR: a CHAR: b HEX: 5C HEX: A8 } ] [ "ab¥ィ" shift-jis encode >array ] unit-test
[ "ab\\ィ" ] [ { CHAR: a CHAR: b HEX: 5C HEX: A8 } windows-31j decode ] unit-test
[ { CHAR: a CHAR: b HEX: 5C HEX: A8 } ] [ "ab\\ィ" windows-31j encode >array ] unit-test
[ "\u000081\u0000c8" ] [ CHAR: logical-and 1string windows-31j encode >string ] unit-test
[ "\u000081\u0000c8" ] [ CHAR: logical-and 1string shift-jis encode >string ] unit-test
[ { CHAR: logical-and } ] [ "\u000081\u0000c8" windows-31j decode >array ] unit-test
[ { CHAR: logical-and } ] [ "\u000081\u0000c8" shift-jis decode >array ] unit-test

View File

@ -0,0 +1,61 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings
math io.encodings.ascii arrays accessors splitting math.parser
biassocs ;
IN: io.encodings.japanese
VALUE: shift-jis
VALUE: windows-31j
<PRIVATE
TUPLE: jis assoc ;
: <jis> ( assoc -- jis )
[ nip ] assoc-filter H{ } assoc-like
>biassoc jis boa ;
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
: jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
: process-jis ( lines -- assoc )
[ "#" split1 drop ] map harvest [
"\t" split 2 head
[ 2 short tail hex> ] map
] map ;
: make-jis ( filename -- jis )
ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
make-jis to: shift-jis
: small? ( char -- ? )
! ASCII range or single-byte halfwidth katakana
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
: write-halfword ( stream halfword -- )
h>b/b swap B{ } 2sequence swap stream-write ;
M: jis encode-char
swapd ch>jis
dup small?
[ swap stream-write1 ]
[ write-halfword ] if ;
M: jis decode-char
swap dup stream-read1 [
dup small? [ nip swap jis>ch ] [
swap stream-read1
[ 2array be> swap jis>ch ]
[ 2drop replacement-char ] if*
] if
] [ 2drop f ] if* ;
PRIVATE>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
Japanese text encodings

View File

@ -0,0 +1 @@
text

View File

@ -1,3 +1,5 @@
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.encodings strings ;
IN: io.encodings.utf16

View File

@ -1,6 +1,7 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel tools.test io.encodings.utf16 arrays sbufs
io.streams.byte-array sequences io.encodings io
bootstrap.unicode
io.encodings.string alien.c-types alien.strings accessors classes ;
IN: io.encodings.utf16.tests
@ -15,7 +16,6 @@ IN: io.encodings.utf16.tests
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { 0 BIN: 11011111 } utf16le decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } utf16le decode >array ] unit-test
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } utf16le decode >array ] unit-test
[ { 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } utf16le encode >array ] unit-test

View File

@ -101,13 +101,9 @@ M: utf16le encode-char ( char stream encoding -- )
! UTF-16
: bom-le B{ HEX: ff HEX: fe } ; inline
CONSTANT: bom-le B{ HEX: ff HEX: fe }
: bom-be B{ HEX: fe HEX: ff } ; inline
: start-utf16le? ( seq1 -- seq2 ? ) bom-le ?head ;
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
CONSTANT: bom-be B{ HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf16le ] [

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1 @@
UTF32 encoding/decoding

View File

@ -0,0 +1 @@
text

View File

@ -0,0 +1,27 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax io.encodings strings ;
IN: io.encodings.utf32
ARTICLE: "io.encodings.utf32" "UTF-32 encoding"
"The UTF-32 encoding is a fixed-width encoding. Unicode code points are encoded as 4 byte sequences. There are three encoding descriptor classes for working with UTF-32, depending on endianness or the presence of a BOM:"
{ $subsection utf32 }
{ $subsection utf32le }
{ $subsection utf32be } ;
ABOUT: "io.encodings.utf32"
HELP: utf32le
{ $class-description "The encoding descriptor for UTF-32LE, that is, UTF-32 in little endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
{ $see-also "encodings-introduction" } ;
HELP: utf32be
{ $class-description "The encoding descriptor for UTF-32BE, that is, UTF-32 in big endian, without a byte order mark. Streams can be made which read or write wth this encoding." }
{ $see-also "encodings-introduction" } ;
HELP: utf32
{ $class-description "The encoding descriptor for UTF-32, that is, UTF-32 with a byte order mark. This is the most useful for general input and output in UTF-32. Streams can be made which read or write wth this encoding." }
{ $see-also "encodings-introduction" } ;
{ utf32 utf32le utf32be } related-words

View File

@ -0,0 +1,30 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel tools.test io.encodings.utf32 arrays sbufs
io.streams.byte-array sequences io.encodings io
io.encodings.string alien.c-types alien.strings accessors classes ;
IN: io.encodings.utf32.tests
[ { CHAR: x } ] [ { 0 0 0 CHAR: x } utf32be decode >array ] unit-test
[ { HEX: 1D11E } ] [ { 0 1 HEX: D1 HEX: 1E } utf32be decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { 0 1 HEX: D1 } utf32be decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { 0 1 } utf32be decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { 0 } utf32be decode >array ] unit-test
[ { } ] [ { } utf32be decode >array ] unit-test
[ { 0 0 0 CHAR: x 0 1 HEX: D1 HEX: 1E } ] [ { CHAR: x HEX: 1d11e } utf32be encode >array ] unit-test
[ { CHAR: x } ] [ { CHAR: x 0 0 0 } utf32le decode >array ] unit-test
[ { HEX: 1d11e } ] [ { HEX: 1e HEX: d1 1 0 } utf32le decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { HEX: 1e HEX: d1 1 } utf32le decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { HEX: 1e HEX: d1 } utf32le decode >array ] unit-test
[ { CHAR: replacement-character } ] [ { HEX: 1e } utf32le decode >array ] unit-test
[ { } ] [ { } utf32le decode >array ] unit-test
[ { 120 0 0 0 HEX: 1e HEX: d1 1 0 } ] [ { CHAR: x HEX: 1d11e } utf32le encode >array ] unit-test
[ { CHAR: x } ] [ { HEX: ff HEX: fe 0 0 CHAR: x 0 0 0 } utf32 decode >array ] unit-test
[ { CHAR: x } ] [ { 0 0 HEX: fe HEX: ff 0 0 0 CHAR: x } utf32 decode >array ] unit-test
[ { HEX: ff HEX: fe 0 0 120 0 0 0 HEX: 1e HEX: d1 1 0 } ] [ { CHAR: x HEX: 1d11e } utf32 encode >array ] unit-test

View File

@ -0,0 +1,56 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel io.encodings combinators io io.encodings.utf16
sequences io.binary ;
IN: io.encodings.utf32
SINGLETON: utf32be
SINGLETON: utf32le
SINGLETON: utf32
<PRIVATE
! Decoding
: char> ( stream encoding quot -- ch )
nip swap 4 swap stream-read dup length {
{ 0 [ 2drop f ] }
{ 4 [ swap call ] }
[ 3drop replacement-char ]
} case ; inline
M: utf32be decode-char
[ be> ] char> ;
M: utf32le decode-char
[ le> ] char> ;
! Encoding
: >char ( char stream encoding quot -- )
nip 4 swap curry dip stream-write ; inline
M: utf32be encode-char
[ >be ] >char ;
M: utf32le encode-char
[ >le ] >char ;
! UTF-32
CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf32le ] [
bom-be sequence= [ utf32be ] [ missing-bom ] if
] if ;
M: utf32 <decoder> ( stream utf32 -- decoder )
drop 4 over stream-read bom>le/be <decoder> ;
M: utf32 <encoder> ( stream utf32 -- encoder )
drop bom-le over stream-write utf32le <encoder> ;

View File

@ -10,3 +10,7 @@ IN: io.binary.tests
[ 1234 ] [ 1234 4 >le le> ] unit-test
[ fixnum ] [ B{ 0 0 0 0 0 0 0 0 0 0 } be> class ] unit-test
[ HEX: 56780000 HEX: 12340000 ] [ HEX: 1234000056780000 d>w/w ] unit-test
[ HEX: 5678 HEX: 1234 ] [ HEX: 12345678 w>h/h ] unit-test
[ HEX: 34 HEX: 12 ] [ HEX: 1234 h>b/b ] unit-test

View File

@ -14,13 +14,13 @@ IN: io.binary
: >be ( x n -- byte-array ) >le dup reverse-here ;
: d>w/w ( d -- w1 w2 )
dup HEX: ffffffff bitand
swap -32 shift HEX: ffffffff bitand ;
[ HEX: ffffffff bitand ]
[ -32 shift HEX: ffffffff bitand ] bi ;
: w>h/h ( w -- h1 h2 )
dup HEX: ffff bitand
swap -16 shift HEX: ffff bitand ;
[ HEX: ffff bitand ]
[ -16 shift HEX: ffff bitand ] bi ;
: h>b/b ( h -- b1 b2 )
dup mask-byte
swap -8 shift mask-byte ;
[ mask-byte ]
[ -8 shift mask-byte ] bi ;

View File

@ -78,6 +78,7 @@ ARTICLE: "encodings-descriptors" "Encoding descriptors"
{ $subsection "io.encodings.binary" }
{ $subsection "io.encodings.utf8" }
{ $subsection "io.encodings.utf16" }
{ $vocab-subsection "UTF-32 encoding" "io.encodings.utf32" }
{ $vocab-subsection "Strict encodings" "io.encodings.strict" }
"Legacy encodings:"
{ $vocab-subsection "8-bit encodings" "io.encodings.8-bit" }