Merge branch 'master' of git://factorcode.org/git/factor
commit
71f729499d
|
@ -1,5 +1,5 @@
|
|||
USING: images.bitmap images.viewer io.encodings.binary
|
||||
io.files io.files.unique kernel tools.test ;
|
||||
io.files io.files.unique kernel tools.test images.loader ;
|
||||
IN: images.bitmap.tests
|
||||
|
||||
: test-bitmap24 ( -- path )
|
||||
|
@ -17,7 +17,7 @@ IN: images.bitmap.tests
|
|||
[ t ]
|
||||
[
|
||||
test-bitmap24
|
||||
[ binary file-contents ] [ load-bitmap ] bi
|
||||
[ binary file-contents ] [ load-image ] bi
|
||||
|
||||
"test-bitmap24" unique-file
|
||||
[ save-bitmap ] [ binary file-contents ] bi =
|
||||
|
|
|
@ -6,15 +6,20 @@ kernel macros math math.bitwise math.functions namespaces sequences
|
|||
strings images endian summary ;
|
||||
IN: images.bitmap
|
||||
|
||||
TUPLE: bitmap-image < image ;
|
||||
TUPLE: bitmap-image < image
|
||||
magic size reserved offset header-length width
|
||||
height planes bit-count compression size-image
|
||||
x-pels y-pels color-used color-important rgb-quads color-index ;
|
||||
|
||||
! Currently can only handle 24/32bit bitmaps.
|
||||
! Handles row-reversed bitmaps (their height is negative)
|
||||
|
||||
TUPLE: bitmap magic size reserved offset header-length width
|
||||
height planes bit-count compression size-image
|
||||
x-pels y-pels color-used color-important rgb-quads color-index
|
||||
buffer ;
|
||||
ERROR: bitmap-magic magic ;
|
||||
|
||||
M: bitmap-magic summary
|
||||
drop "First two bytes of bitmap stream must be 'BM'" ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: array-copy ( bitmap array -- bitmap array' )
|
||||
over size-image>> abs memory>byte-array ;
|
||||
|
@ -37,16 +42,11 @@ ERROR: bmp-not-supported n ;
|
|||
{ 1 [ bmp-not-supported ] }
|
||||
} case >byte-array ;
|
||||
|
||||
ERROR: bitmap-magic ;
|
||||
|
||||
M: bitmap-magic summary
|
||||
drop "First two bytes of bitmap stream must be 'BM'" ;
|
||||
|
||||
: read2 ( -- n ) 2 read le> ;
|
||||
: read4 ( -- n ) 4 read le> ;
|
||||
|
||||
: parse-file-header ( bitmap -- bitmap )
|
||||
2 read >string dup "BM" = [ bitmap-magic ] unless >>magic
|
||||
2 read dup "BM" sequence= [ bitmap-magic ] unless >>magic
|
||||
read4 >>size
|
||||
read4 >>reserved
|
||||
read4 >>offset ;
|
||||
|
@ -79,17 +79,13 @@ M: bitmap-magic summary
|
|||
dup rgb-quads-length read >>rgb-quads
|
||||
dup color-index-length read >>color-index ;
|
||||
|
||||
: load-bitmap-data ( path -- bitmap )
|
||||
binary [
|
||||
bitmap new
|
||||
parse-file-header parse-bitmap-header parse-bitmap
|
||||
: load-bitmap-data ( path bitmap -- bitmap )
|
||||
[ binary ] dip '[
|
||||
_ parse-file-header parse-bitmap-header parse-bitmap
|
||||
] with-file-reader ;
|
||||
|
||||
: process-bitmap-data ( bitmap -- bitmap )
|
||||
dup raw-bitmap>buffer >>buffer ;
|
||||
|
||||
: load-bitmap ( path -- bitmap )
|
||||
load-bitmap-data process-bitmap-data ;
|
||||
dup raw-bitmap>buffer >>bitmap ;
|
||||
|
||||
ERROR: unknown-component-order bitmap ;
|
||||
|
||||
|
@ -101,15 +97,16 @@ ERROR: unknown-component-order bitmap ;
|
|||
[ unknown-component-order ]
|
||||
} case ;
|
||||
|
||||
: >image ( bitmap -- bitmap-image )
|
||||
{
|
||||
[ [ width>> ] [ height>> ] bi 2array ]
|
||||
[ bitmap>component-order ]
|
||||
[ buffer>> ]
|
||||
} cleave bitmap-image boa ;
|
||||
: fill-image-slots ( bitmap -- bitmap )
|
||||
dup {
|
||||
[ [ width>> ] [ height>> ] bi 2array >>dim ]
|
||||
[ bitmap>component-order >>component-order ]
|
||||
[ bitmap>> >>bitmap ]
|
||||
} cleave ;
|
||||
|
||||
M: bitmap-image load-image* ( path bitmap -- bitmap-image )
|
||||
drop load-bitmap >image ;
|
||||
M: bitmap-image load-image* ( path bitmap -- bitmap )
|
||||
load-bitmap-data process-bitmap-data
|
||||
fill-image-slots ;
|
||||
|
||||
M: bitmap-image normalize-scan-line-order
|
||||
dup dim>> '[
|
||||
|
@ -118,12 +115,12 @@ M: bitmap-image normalize-scan-line-order
|
|||
|
||||
MACRO: (nbits>bitmap) ( bits -- )
|
||||
[ -3 shift ] keep '[
|
||||
bitmap new
|
||||
bitmap-image new
|
||||
2over * _ * >>size-image
|
||||
swap >>height
|
||||
swap >>width
|
||||
swap array-copy [ >>buffer ] [ >>color-index ] bi
|
||||
_ >>bit-count >image
|
||||
swap array-copy [ >>bitmap ] [ >>color-index ] bi
|
||||
_ >>bit-count fill-image-slots
|
||||
] ;
|
||||
|
||||
: bgr>bitmap ( array height width -- bitmap )
|
||||
|
@ -135,11 +132,13 @@ MACRO: (nbits>bitmap) ( bits -- )
|
|||
: write2 ( n -- ) 2 >le write ;
|
||||
: write4 ( n -- ) 4 >le write ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: save-bitmap ( bitmap path -- )
|
||||
binary [
|
||||
B{ CHAR: B CHAR: M } write
|
||||
[
|
||||
buffer>> length 14 + 40 + write4
|
||||
color-index>> length 14 + 40 + write4
|
||||
0 write4
|
||||
54 write4
|
||||
40 write4
|
||||
|
|
|
@ -68,7 +68,7 @@ GENERIC: load-image* ( path tuple -- image )
|
|||
[
|
||||
3 <sliced-groups>
|
||||
[ [ 3 head-slice reverse-here ] each ]
|
||||
[ add-dummy-alpha ] bi
|
||||
[ [ 255 suffix ] map ] bi concat
|
||||
] change-bitmap
|
||||
] }
|
||||
} case
|
||||
|
@ -81,4 +81,4 @@ M: image normalize-scan-line-order ;
|
|||
: normalize-image ( image -- image )
|
||||
[ >byte-array ] change-bitmap
|
||||
normalize-component-order
|
||||
normalize-scan-line-order ;
|
||||
normalize-scan-line-order ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
|
@ -0,0 +1,7 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test images.png ;
|
||||
IN: images.png.tests
|
||||
|
||||
: png-test-path ( -- path )
|
||||
"vocab:images/test-images/rgb.png" ;
|
|
@ -0,0 +1,41 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors constructors images io io.binary io.encodings.ascii
|
||||
io.encodings.binary io.encodings.string io.files io.files.info kernel
|
||||
sequences io.streams.limited ;
|
||||
IN: images.png
|
||||
|
||||
TUPLE: png-image < image chunks ;
|
||||
|
||||
CONSTRUCTOR: png-image ( -- image )
|
||||
V{ } clone >>chunks ;
|
||||
|
||||
TUPLE: png-chunk length type data crc ;
|
||||
|
||||
CONSTRUCTOR: png-chunk ( -- png-chunk ) ;
|
||||
|
||||
CONSTANT: png-header B{ HEX: 89 HEX: 50 HEX: 4e HEX: 47 HEX: 0d HEX: 0a HEX: 1a HEX: 0a }
|
||||
|
||||
ERROR: bad-png-header header ;
|
||||
|
||||
: read-png-header ( -- )
|
||||
8 read dup png-header sequence= [
|
||||
bad-png-header
|
||||
] unless drop ;
|
||||
|
||||
: read-png-chunks ( image -- image )
|
||||
<png-chunk>
|
||||
4 read be> >>length
|
||||
4 read ascii decode >>type
|
||||
dup length>> read >>data
|
||||
4 read >>crc
|
||||
[ over chunks>> push ]
|
||||
[ type>> ] bi "IEND" =
|
||||
[ read-png-chunks ] unless ;
|
||||
|
||||
: load-png ( path -- image )
|
||||
[ binary <file-reader> ] [ file-info size>> ] bi stream-throws <limited-stream> [
|
||||
<png-image>
|
||||
read-png-header
|
||||
read-png-chunks
|
||||
] with-input-stream ;
|
Binary file not shown.
After Width: | Height: | Size: 4.2 KiB |
|
@ -4,7 +4,7 @@ USING: math.parser arrays io.encodings sequences kernel assocs
|
|||
hashtables io.encodings.ascii generic parser classes.tuple words
|
||||
words.symbol io io.files splitting namespaces math
|
||||
compiler.units accessors classes.singleton classes.mixin
|
||||
io.encodings.iana fry ;
|
||||
io.encodings.iana fry simple-flat-file ;
|
||||
IN: io.encodings.8-bit
|
||||
|
||||
<PRIVATE
|
||||
|
@ -35,36 +35,22 @@ CONSTANT: mappings {
|
|||
: encoding-file ( file-name -- stream )
|
||||
"vocab:io/encodings/8-bit/" ".TXT" surround ;
|
||||
|
||||
: process-contents ( lines -- assoc )
|
||||
[ "#" split1 drop ] map harvest
|
||||
[ "\t" split 2 head [ 2 short tail hex> ] map ] map ;
|
||||
|
||||
: byte>ch ( assoc -- array )
|
||||
256 replacement-char <array>
|
||||
[ '[ swap _ set-nth ] assoc-each ] keep ;
|
||||
|
||||
: ch>byte ( assoc -- newassoc )
|
||||
[ swap ] assoc-map >hashtable ;
|
||||
|
||||
: parse-file ( path -- byte>ch ch>byte )
|
||||
ascii file-lines process-contents
|
||||
[ byte>ch ] [ ch>byte ] bi ;
|
||||
|
||||
SYMBOL: 8-bit-encodings
|
||||
|
||||
TUPLE: 8-bit decode encode ;
|
||||
TUPLE: 8-bit biassoc ;
|
||||
|
||||
: encode-8-bit ( char stream assoc -- )
|
||||
swapd at*
|
||||
[ swap stream-write1 ] [ nip encode-error ] if ; inline
|
||||
swapd value-at
|
||||
[ swap stream-write1 ] [ encode-error ] if* ; inline
|
||||
|
||||
M: 8-bit encode-char encode>> encode-8-bit ;
|
||||
M: 8-bit encode-char biassoc>> encode-8-bit ;
|
||||
|
||||
: decode-8-bit ( stream array -- char/f )
|
||||
swap stream-read1 dup
|
||||
[ swap nth [ replacement-char ] unless* ] [ 2drop f ] if ; inline
|
||||
: decode-8-bit ( stream assoc -- char/f )
|
||||
swap stream-read1
|
||||
[ swap at [ replacement-char ] unless* ]
|
||||
[ drop f ] if* ; inline
|
||||
|
||||
M: 8-bit decode-char decode>> decode-8-bit ;
|
||||
M: 8-bit decode-char biassoc>> decode-8-bit ;
|
||||
|
||||
MIXIN: 8-bit-encoding
|
||||
|
||||
|
@ -87,7 +73,7 @@ PRIVATE>
|
|||
first3
|
||||
[ create-encoding ]
|
||||
[ dupd register-encoding ]
|
||||
[ encoding-file parse-file 8-bit boa ]
|
||||
[ encoding-file flat-file>biassoc 8-bit boa ]
|
||||
tri*
|
||||
] H{ } map>assoc
|
||||
8-bit-encodings set-global
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,9 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.iana io.encodings.euc ;
|
||||
IN: io.encodings.big5
|
||||
|
||||
EUC: big5 "vocab:io/encodings/big5/CP950.txt"
|
||||
|
||||
big5 "Big5" register-encoding
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,8 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.euc io.encodings.iana ;
|
||||
IN: io.encodings.euc-jp
|
||||
|
||||
EUC: euc-jp "vocab:io/encodings/euc-jp/euc-jp-2000-std.txt"
|
||||
|
||||
euc-jp "Extended_UNIX_Code_Packed_Format_for_Japanese" register-encoding
|
|
@ -0,0 +1 @@
|
|||
EUC-JP text encoding
|
|
@ -1,14 +1,10 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: io.encodings.korean
|
||||
IN: io.encodings.euc-kr
|
||||
|
||||
ARTICLE: "io.encodings.korean" "Korean text encodings"
|
||||
"The " { $vocab-link "io.encodings.korean" } " vocabulary implements encodings used for Korean text besides the standard UTF encodings for Unicode strings."
|
||||
{ $subsection cp949 } ;
|
||||
ABOUT: euc-kr
|
||||
|
||||
ABOUT: "io.encodings.korean"
|
||||
|
||||
HELP: cp949
|
||||
{ $class-description "This encoding class implements Microsoft's code page #949 encoding, also called Unified Hangul Code or ks_c_5601-1987, UHC. CP949 is extended version of EUC-KR and downward-compatibility to EUC-KR. " }
|
||||
HELP: euc-kr
|
||||
{ $class-description "This encoding class implements Microsoft's code page #949 encoding, also called Unified Hangul Code or ks_c_5601-1987, UHC. CP949 is extended version of EUC-KR and downward-compatibility to EUC-KR, in practice." }
|
||||
{ $see-also "encodings-introduction" } ;
|
|
@ -0,0 +1,8 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.iana io.encodings.euc ;
|
||||
IN: io.encodings.euc-kr
|
||||
|
||||
EUC: euc-kr "vocab:io/encodings/euc-kr/data/cp949.txt"
|
||||
|
||||
euc-kr "EUC-KR" register-encoding
|
|
@ -0,0 +1,50 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays byte-arrays io io.encodings io.encodings.euc-kr assocs
|
||||
io.encodings.string io.streams.string io.encodings.euc.private words
|
||||
kernel locals multiline namespaces sequences strings tools.test ;
|
||||
IN: io.encodings.euc.tests
|
||||
|
||||
: euc-kr>unicode ( ch -- ch/f )
|
||||
euc-kr euc-table word-prop at ;
|
||||
|
||||
: unicode>euc-kr ( ch -- ch/f )
|
||||
euc-kr euc-table word-prop value-at ;
|
||||
|
||||
[ f ] [ HEX: 80 euc-kr>unicode ] unit-test
|
||||
[ f ] [ HEX: ff euc-kr>unicode ] unit-test
|
||||
[ HEX: ac02 ] [ HEX: 8141 euc-kr>unicode ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f euc-kr>unicode ] unit-test
|
||||
[ HEX: c724 ] [ HEX: c0b1 euc-kr>unicode ] unit-test
|
||||
|
||||
[ HEX: 8141 ] [ HEX: ac02 unicode>euc-kr ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f unicode>euc-kr ] unit-test
|
||||
[ HEX: c0b1 ] [ HEX: c724 unicode>euc-kr ] unit-test
|
||||
|
||||
: phrase-unicode ( -- s )
|
||||
"\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
|
||||
|
||||
: phrase-euc-kr ( -- s )
|
||||
{
|
||||
HEX: b5 HEX: bf HEX: c7 HEX: d8
|
||||
HEX: b9 HEX: b0 HEX: b0 HEX: fa
|
||||
HEX: 20 HEX: b9 HEX: e9 HEX: b5
|
||||
HEX: ce HEX: bb HEX: ea HEX: c0
|
||||
HEX: cc HEX: 21
|
||||
} ;
|
||||
|
||||
: phrase-unicode>euc-kr ( -- s )
|
||||
phrase-unicode euc-kr encode ;
|
||||
|
||||
: phrase-euc-kr>unicode ( -- s )
|
||||
phrase-euc-kr euc-kr decode ;
|
||||
|
||||
[ t ] [ phrase-unicode>euc-kr >array phrase-euc-kr = ] unit-test
|
||||
|
||||
[ t ] [ phrase-euc-kr>unicode phrase-unicode = ] unit-test
|
||||
|
||||
[ t ] [ phrase-euc-kr 1 head* euc-kr decode phrase-unicode 1 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-euc-kr 3 head* euc-kr decode phrase-unicode 2 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-euc-kr 2 head* euc-kr decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
|
|
@ -0,0 +1,68 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg, Jonghyouk Yun.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel io.encodings accessors assocs sequences biassocs generic
|
||||
math.order simple-flat-file io io.binary byte-arrays locals combinators
|
||||
words classes.singleton fry classes.parser parser quotations ;
|
||||
IN: io.encodings.euc
|
||||
|
||||
TUPLE: euc { table biassoc } ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: byte? ( ch -- ? )
|
||||
HEX: 0 HEX: ff between? ;
|
||||
|
||||
M: euc encode-char ( char stream encoding -- )
|
||||
swapd table>> value-at [
|
||||
dup byte?
|
||||
[ swap stream-write1 ] [
|
||||
h>b/b swap 2byte-array
|
||||
swap stream-write
|
||||
] if
|
||||
] [ encode-error ] if* ;
|
||||
|
||||
: euc-multibyte? ( ch -- ? )
|
||||
HEX: 81 HEX: fe between? ;
|
||||
|
||||
:: decode-multibyte ( ch stream encoding -- char )
|
||||
stream stream-read1
|
||||
[ ch swap 2byte-array be> encoding table>> at ]
|
||||
[ replacement-char ] if* ;
|
||||
|
||||
M:: euc decode-char ( stream encoding -- char/f )
|
||||
stream stream-read1
|
||||
{
|
||||
{ [ dup not ] [ drop f ] }
|
||||
{ [ dup euc-multibyte? ] [ stream encoding decode-multibyte ] }
|
||||
[ encoding table>> at ]
|
||||
} cond ;
|
||||
|
||||
: define-method ( class word definition -- )
|
||||
[ create-method ] dip define ;
|
||||
|
||||
SYMBOL: euc-table
|
||||
|
||||
: setup-euc ( word file-name -- singleton-class biassoc )
|
||||
[ dup define-singleton-class ]
|
||||
[ flat-file>biassoc ] bi* ;
|
||||
|
||||
:: define-recursive-methods ( class data words -- )
|
||||
words [| word |
|
||||
class word [ drop data word execute ] define-method
|
||||
] each ;
|
||||
|
||||
: euc-methods ( singleton-class biassoc -- )
|
||||
[ euc-table set-word-prop ] [
|
||||
euc boa
|
||||
{ <encoder> <decoder> }
|
||||
define-recursive-methods
|
||||
] 2bi ;
|
||||
|
||||
: define-euc ( word file-name -- )
|
||||
setup-euc euc-methods ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: EUC:
|
||||
! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
|
||||
CREATE-CLASS scan-object define-euc ; parsing
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -1,13 +1,13 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: io.encodings.chinese
|
||||
IN: io.encodings.gb18030
|
||||
|
||||
ARTICLE: "io.encodings.chinese" "Chinese text encodings"
|
||||
"The " { $vocab-link "io.encodings.chinese" } " vocabulary implements encodings used for Chinese text besides the standard UTF encodings for Unicode strings."
|
||||
ARTICLE: "io.encodings.gb18030" "GB 18030"
|
||||
"The " { $vocab-link "io.encodings.gb18030" } " vocabulary implements GB18030, a commonly used encoding for Chinese text besides the standard UTF encodings for Unicode strings."
|
||||
{ $subsection gb18030 } ;
|
||||
|
||||
ABOUT: "io.encodings.chinese"
|
||||
ABOUT: "io.encodings.gb18030"
|
||||
|
||||
HELP: gb18030
|
||||
{ $class-description "The encoding descriptor for GB 18030, a Chinese national standard for text encoding. GB 18030 consists of a unique encoding for each Unicode code point, and for this reason has been described as a UTF. It is backwards compatible with the earlier encodings GB 2312 and GBK." }
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.chinese io.encodings.string strings tools.test arrays ;
|
||||
IN: io.encodings.chinese.tests
|
||||
USING: io.encodings.gb18030 io.encodings.string strings tools.test arrays ;
|
||||
IN: io.encodings.gb18030.tests
|
||||
|
||||
[ "hello" ] [ "hello" gb18030 encode >string ] unit-test
|
||||
[ "hello" ] [ "hello" gb18030 decode ] unit-test
|
|
@ -4,7 +4,7 @@ USING: xml xml.data kernel io io.encodings interval-maps splitting fry
|
|||
math.parser sequences combinators assocs locals accessors math arrays
|
||||
byte-arrays values io.encodings.ascii ascii io.files biassocs
|
||||
math.order combinators.short-circuit io.binary io.encodings.iana ;
|
||||
IN: io.encodings.chinese
|
||||
IN: io.encodings.gb18030
|
||||
|
||||
SINGLETON: gb18030
|
||||
|
||||
|
@ -80,7 +80,7 @@ VALUE: gb>u
|
|||
VALUE: u>gb
|
||||
VALUE: mapping
|
||||
|
||||
"vocab:io/encodings/chinese/gb-18030-2000.xml"
|
||||
"vocab:io/encodings/gb18030/gb-18030-2000.xml"
|
||||
ascii <file-reader> xml>gb-data
|
||||
[ ranges-u>gb to: u>gb ] [ ranges-gb>u to: gb>u ] bi
|
||||
>biassoc to: mapping
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,9 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: io.encodings.johab
|
||||
|
||||
ABOUT: johab
|
||||
|
||||
HELP: johab
|
||||
{ $class-description "Korean Johab encoding (KSC5601-1992). This encoding is not commonly used anymore." } ;
|
|
@ -0,0 +1,36 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays byte-arrays io io.encodings io.encodings.johab assocs
|
||||
io.encodings.string io.streams.string io.encodings.euc.private words
|
||||
kernel locals multiline namespaces sequences strings tools.test ;
|
||||
IN: io.encodings.johab.tests
|
||||
|
||||
: johab>unicode ( ch -- ch/f )
|
||||
johab euc-table word-prop at ;
|
||||
|
||||
: unicode>johab ( ch -- ch/f )
|
||||
johab euc-table word-prop value-at ;
|
||||
|
||||
! johab encodings
|
||||
[ HEX: 20 ] [ HEX: 20 johab>unicode ] unit-test
|
||||
[ HEX: 3133 ] [ HEX: 8444 johab>unicode ] unit-test
|
||||
[ HEX: 8A5D ] [ HEX: AD4F unicode>johab ] unit-test
|
||||
|
||||
: phrase-unicode ( -- s )
|
||||
"\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
|
||||
|
||||
: phrase-johab ( -- s )
|
||||
B{
|
||||
149 183 208 129 162 137 137 193 32 164 130 150 129 172 101
|
||||
183 161 33
|
||||
} ;
|
||||
|
||||
: phrase-johab>unicode ( -- s )
|
||||
phrase-johab johab decode ;
|
||||
|
||||
: phrase-unicode>johab ( -- s )
|
||||
phrase-unicode johab encode ;
|
||||
|
||||
[ t ] [ phrase-johab>unicode phrase-unicode = ] unit-test
|
||||
[ t ] [ phrase-unicode>johab phrase-johab = ] unit-test
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USE: io.encodings.euc
|
||||
IN: io.encodings.johab
|
||||
|
||||
EUC: johab "vocab:io/encodings/johab/johab.txt"
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Johab Korean text encoding
|
|
@ -1,46 +0,0 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays byte-arrays io io.encodings io.encodings.korean
|
||||
io.encodings.korean.private io.encodings.string io.streams.string
|
||||
kernel locals multiline namespaces sequences strings tools.test ;
|
||||
IN: io.encodings.korean.tests
|
||||
|
||||
! convert cp949 <> unicode
|
||||
|
||||
[ f ] [ HEX: 80 cp949>unicode ] unit-test
|
||||
[ f ] [ HEX: ff cp949>unicode ] unit-test
|
||||
[ HEX: ac02 ] [ HEX: 8141 cp949>unicode ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f cp949>unicode ] unit-test
|
||||
[ HEX: c724 ] [ HEX: c0b1 cp949>unicode ] unit-test
|
||||
|
||||
[ HEX: 8141 ] [ HEX: ac02 unicode>cp949 ] unit-test
|
||||
[ HEX: 7f ] [ HEX: 7f unicode>cp949 ] unit-test
|
||||
[ HEX: c0b1 ] [ HEX: c724 unicode>cp949 ] unit-test
|
||||
|
||||
: phrase-unicode ( -- s )
|
||||
"\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
|
||||
|
||||
: phrase-cp949 ( -- s )
|
||||
{
|
||||
HEX: b5 HEX: bf HEX: c7 HEX: d8
|
||||
HEX: b9 HEX: b0 HEX: b0 HEX: fa
|
||||
HEX: 20 HEX: b9 HEX: e9 HEX: b5
|
||||
HEX: ce HEX: bb HEX: ea HEX: c0
|
||||
HEX: cc HEX: 21
|
||||
} ;
|
||||
|
||||
: phrase-unicode>cp949 ( -- s )
|
||||
phrase-unicode cp949 encode ;
|
||||
|
||||
: phrase-cp949>unicode ( -- s )
|
||||
phrase-cp949 cp949 decode ;
|
||||
|
||||
[ t ] [ phrase-unicode>cp949 >array phrase-cp949 = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949>unicode phrase-unicode = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 1 head* cp949 decode phrase-unicode 1 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
|
|
@ -1,81 +0,0 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs byte-arrays combinators io io.encodings
|
||||
io.encodings.ascii io.encodings.iana io.files kernel locals math
|
||||
math.order math.parser memoize multiline sequences splitting
|
||||
values hashtables io.binary ;
|
||||
IN: io.encodings.korean
|
||||
|
||||
! TODO: migrate to common code-table parser (by Dan).
|
||||
|
||||
SINGLETON: cp949
|
||||
|
||||
cp949 "EUC-KR" register-encoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! parse cp949.txt > table
|
||||
|
||||
: cp949.txt-lines ( -- seq )
|
||||
! "cp949.txt" from ...
|
||||
! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT>
|
||||
"vocab:io/encodings/korean/data/cp949.txt"
|
||||
ascii file-lines ;
|
||||
|
||||
: drop-comments ( seq -- newseq )
|
||||
[ "#" split1 drop ] map harvest ;
|
||||
|
||||
: split-column ( line -- columns )
|
||||
"\t" split 2 head ;
|
||||
|
||||
: parse-hex ( s -- n )
|
||||
2 short tail hex> ;
|
||||
|
||||
: parse-line ( line -- code-unicode )
|
||||
split-column [ parse-hex ] map ;
|
||||
|
||||
: process-codetable-lines ( lines -- assoc )
|
||||
drop-comments [ parse-line ] map ;
|
||||
|
||||
! convert cp949 <> unicode
|
||||
|
||||
MEMO: cp949>unicode-table ( -- hashtable )
|
||||
cp949.txt-lines process-codetable-lines >hashtable ;
|
||||
|
||||
MEMO: unicode>cp949-table ( -- hashtable )
|
||||
cp949>unicode-table [ swap ] assoc-map ;
|
||||
|
||||
unicode>cp949-table drop
|
||||
|
||||
: cp949>unicode ( b -- u )
|
||||
cp949>unicode-table at ;
|
||||
|
||||
: unicode>cp949 ( u -- b )
|
||||
unicode>cp949-table at ;
|
||||
|
||||
: cp949-1st? ( n -- ? )
|
||||
dup [ HEX: 81 HEX: fe between? ] when ;
|
||||
|
||||
: byte? ( n -- ? )
|
||||
0 HEX: ff between? ;
|
||||
|
||||
M:: cp949 encode-char ( char stream encoding -- )
|
||||
char unicode>cp949 byte?
|
||||
[ char 1byte-array stream stream-write ] [
|
||||
char unicode>cp949
|
||||
h>b/b swap 2byte-array
|
||||
stream stream-write
|
||||
] if ;
|
||||
|
||||
: decode-char-step2 ( c stream -- char )
|
||||
stream-read1
|
||||
[ 2byte-array be> cp949>unicode ]
|
||||
[ drop replacement-char ] if* ;
|
||||
|
||||
M:: cp949 decode-char ( stream encoding -- char/f )
|
||||
stream stream-read1
|
||||
{
|
||||
{ [ dup not ] [ drop f ] }
|
||||
{ [ dup cp949-1st? ] [ stream decode-char-step2 ] }
|
||||
[ ]
|
||||
} cond ;
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,208 @@
|
|||
#
|
||||
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
|
||||
# Unicode version: 1.1
|
||||
# Table version: 0.9
|
||||
# Table format: Format A
|
||||
# Date: 8 March 1994
|
||||
#
|
||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||
# No claims are made as to fitness for any particular purpose. No
|
||||
# warranties of any kind are expressed or implied. The recipient
|
||||
# agrees to determine applicability of information provided. If this
|
||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
||||
# remedy for any claim will be exchange of defective media within 90
|
||||
# days of receipt.
|
||||
#
|
||||
# Recipient is granted the right to make copies in any form for
|
||||
# internal distribution and to freely use the information supplied
|
||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
||||
# specifically excludes the right to re-distribute this file directly
|
||||
# to third parties or other organizations whether for profit or not.
|
||||
#
|
||||
# General notes:
|
||||
#
|
||||
#
|
||||
# This table contains one set of mappings from JIS X 0201 into Unicode.
|
||||
# Note that these data are *possible* mappings only and may not be the
|
||||
# same as those used by actual products, nor may they be the best suited
|
||||
# for all uses. For more information on the mappings between various code
|
||||
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
|
||||
# VENDORS mapping data. Normative information on the mapping between
|
||||
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
|
||||
# latest Unicode Character Database.
|
||||
#
|
||||
# If you have carefully considered the fact that the mappings in
|
||||
# this table are only one possible set of mappings between JIS X 0201 and
|
||||
# Unicode and have no normative status, but still feel that you
|
||||
# have located an error in the table that requires fixing, you may
|
||||
# report any such error to errata@unicode.org.
|
||||
#
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the shift JIS code (in hex as 0xXX)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
|
||||
#
|
||||
# The entries are in JIS order
|
||||
#
|
||||
#
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x00A5 # YEN SIGN
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x203E # OVERLINE
|
||||
0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET
|
||||
0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET
|
||||
0xA4 0xFF64 # HALFWIDTH IDEOGRAPHIC COMMA
|
||||
0xA5 0xFF65 # HALFWIDTH KATAKANA MIDDLE DOT
|
||||
0xA6 0xFF66 # HALFWIDTH KATAKANA LETTER WO
|
||||
0xA7 0xFF67 # HALFWIDTH KATAKANA LETTER SMALL A
|
||||
0xA8 0xFF68 # HALFWIDTH KATAKANA LETTER SMALL I
|
||||
0xA9 0xFF69 # HALFWIDTH KATAKANA LETTER SMALL U
|
||||
0xAA 0xFF6A # HALFWIDTH KATAKANA LETTER SMALL E
|
||||
0xAB 0xFF6B # HALFWIDTH KATAKANA LETTER SMALL O
|
||||
0xAC 0xFF6C # HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
0xAD 0xFF6D # HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
0xAE 0xFF6E # HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
0xAF 0xFF6F # HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
0xB0 0xFF70 # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
0xB1 0xFF71 # HALFWIDTH KATAKANA LETTER A
|
||||
0xB2 0xFF72 # HALFWIDTH KATAKANA LETTER I
|
||||
0xB3 0xFF73 # HALFWIDTH KATAKANA LETTER U
|
||||
0xB4 0xFF74 # HALFWIDTH KATAKANA LETTER E
|
||||
0xB5 0xFF75 # HALFWIDTH KATAKANA LETTER O
|
||||
0xB6 0xFF76 # HALFWIDTH KATAKANA LETTER KA
|
||||
0xB7 0xFF77 # HALFWIDTH KATAKANA LETTER KI
|
||||
0xB8 0xFF78 # HALFWIDTH KATAKANA LETTER KU
|
||||
0xB9 0xFF79 # HALFWIDTH KATAKANA LETTER KE
|
||||
0xBA 0xFF7A # HALFWIDTH KATAKANA LETTER KO
|
||||
0xBB 0xFF7B # HALFWIDTH KATAKANA LETTER SA
|
||||
0xBC 0xFF7C # HALFWIDTH KATAKANA LETTER SI
|
||||
0xBD 0xFF7D # HALFWIDTH KATAKANA LETTER SU
|
||||
0xBE 0xFF7E # HALFWIDTH KATAKANA LETTER SE
|
||||
0xBF 0xFF7F # HALFWIDTH KATAKANA LETTER SO
|
||||
0xC0 0xFF80 # HALFWIDTH KATAKANA LETTER TA
|
||||
0xC1 0xFF81 # HALFWIDTH KATAKANA LETTER TI
|
||||
0xC2 0xFF82 # HALFWIDTH KATAKANA LETTER TU
|
||||
0xC3 0xFF83 # HALFWIDTH KATAKANA LETTER TE
|
||||
0xC4 0xFF84 # HALFWIDTH KATAKANA LETTER TO
|
||||
0xC5 0xFF85 # HALFWIDTH KATAKANA LETTER NA
|
||||
0xC6 0xFF86 # HALFWIDTH KATAKANA LETTER NI
|
||||
0xC7 0xFF87 # HALFWIDTH KATAKANA LETTER NU
|
||||
0xC8 0xFF88 # HALFWIDTH KATAKANA LETTER NE
|
||||
0xC9 0xFF89 # HALFWIDTH KATAKANA LETTER NO
|
||||
0xCA 0xFF8A # HALFWIDTH KATAKANA LETTER HA
|
||||
0xCB 0xFF8B # HALFWIDTH KATAKANA LETTER HI
|
||||
0xCC 0xFF8C # HALFWIDTH KATAKANA LETTER HU
|
||||
0xCD 0xFF8D # HALFWIDTH KATAKANA LETTER HE
|
||||
0xCE 0xFF8E # HALFWIDTH KATAKANA LETTER HO
|
||||
0xCF 0xFF8F # HALFWIDTH KATAKANA LETTER MA
|
||||
0xD0 0xFF90 # HALFWIDTH KATAKANA LETTER MI
|
||||
0xD1 0xFF91 # HALFWIDTH KATAKANA LETTER MU
|
||||
0xD2 0xFF92 # HALFWIDTH KATAKANA LETTER ME
|
||||
0xD3 0xFF93 # HALFWIDTH KATAKANA LETTER MO
|
||||
0xD4 0xFF94 # HALFWIDTH KATAKANA LETTER YA
|
||||
0xD5 0xFF95 # HALFWIDTH KATAKANA LETTER YU
|
||||
0xD6 0xFF96 # HALFWIDTH KATAKANA LETTER YO
|
||||
0xD7 0xFF97 # HALFWIDTH KATAKANA LETTER RA
|
||||
0xD8 0xFF98 # HALFWIDTH KATAKANA LETTER RI
|
||||
0xD9 0xFF99 # HALFWIDTH KATAKANA LETTER RU
|
||||
0xDA 0xFF9A # HALFWIDTH KATAKANA LETTER RE
|
||||
0xDB 0xFF9B # HALFWIDTH KATAKANA LETTER RO
|
||||
0xDC 0xFF9C # HALFWIDTH KATAKANA LETTER WA
|
||||
0xDD 0xFF9D # HALFWIDTH KATAKANA LETTER N
|
||||
0xDE 0xFF9E # HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
0xDF 0xFF9F # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,14 +1,14 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax ;
|
||||
IN: io.encodings.japanese
|
||||
IN: io.encodings.shift-jis
|
||||
|
||||
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
|
||||
"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
|
||||
ARTICLE: "io.encodings.shift-jis" "Shift JIS"
|
||||
"Shift JIS is a text encoding for Japanese. There are multiple versions, depending on whether the offical standard or the modified Microsoft version is required."
|
||||
{ $subsection shift-jis }
|
||||
{ $subsection windows-31j } ;
|
||||
|
||||
ABOUT: "io.encodings.japanese"
|
||||
ABOUT: "io.encodings.shift-jis"
|
||||
|
||||
HELP: windows-31j
|
||||
{ $class-description "The encoding descriptor Windows-31J, which is sometimes informally called Shift JIS. This is based on Code Page 932." }
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.japanese tools.test io.encodings.string arrays strings ;
|
||||
IN: io.encodings.japanese.tests
|
||||
USING: io.encodings.shift-jis tools.test io.encodings.string arrays strings ;
|
||||
IN: io.encodings.shift-jis.tests
|
||||
|
||||
[ { CHAR: replacement-character } ] [ { 141 } shift-jis decode >array ] unit-test
|
||||
[ "" ] [ "" shift-jis decode >string ] unit-test
|
|
@ -3,8 +3,9 @@
|
|||
USING: sequences kernel io io.files combinators.short-circuit
|
||||
math.order values assocs io.encodings io.binary fry strings math
|
||||
io.encodings.ascii arrays byte-arrays accessors splitting
|
||||
math.parser biassocs io.encodings.iana ;
|
||||
IN: io.encodings.japanese
|
||||
math.parser biassocs io.encodings.iana
|
||||
locals multiline combinators simple-flat-file ;
|
||||
IN: io.encodings.shift-jis
|
||||
|
||||
SINGLETON: shift-jis
|
||||
|
||||
|
@ -28,26 +29,16 @@ M: windows-31j <decoder> drop windows-31j-table <decoder> ;
|
|||
|
||||
TUPLE: jis assoc ;
|
||||
|
||||
: <jis> ( assoc -- jis )
|
||||
[ nip ] assoc-filter
|
||||
>biassoc jis boa ;
|
||||
|
||||
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
|
||||
: jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
|
||||
|
||||
: process-jis ( lines -- assoc )
|
||||
[ "#" split1 drop ] map harvest [
|
||||
"\t" split 2 head
|
||||
[ 2 short tail hex> ] map
|
||||
] map ;
|
||||
|
||||
: make-jis ( filename -- jis )
|
||||
ascii file-lines process-jis <jis> ;
|
||||
flat-file>biassoc [ nip ] assoc-filter jis boa ;
|
||||
|
||||
"vocab:io/encodings/japanese/CP932.txt"
|
||||
"vocab:io/encodings/shift-jis/CP932.txt"
|
||||
make-jis to: windows-31j-table
|
||||
|
||||
"vocab:io/encodings/japanese/sjis-0208-1997-std.txt"
|
||||
"vocab:io/encodings/shift-jis/sjis-0208-1997-std.txt"
|
||||
make-jis to: shift-jis-table
|
||||
|
||||
: small? ( char -- ? )
|
||||
|
@ -71,5 +62,3 @@ M: jis decode-char
|
|||
[ 2drop replacement-char ] if*
|
||||
] if
|
||||
] [ 2drop f ] if* ;
|
||||
|
||||
PRIVATE>
|
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,8 @@
|
|||
USING: help.syntax help.markup strings ;
|
||||
IN: simple-flat-file
|
||||
|
||||
ABOUT: "simple-flat-file"
|
||||
|
||||
ARTICLE: "simple-flat-file" "Parsing simple flat files"
|
||||
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
|
||||
{ $subsection flat-file>biassoc } ;
|
|
@ -0,0 +1,23 @@
|
|||
! Copyright (C) 2009 Yun, Jonghyouk.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: simple-flat-file tools.test memoize ;
|
||||
IN: simple-flat-file.tests
|
||||
|
||||
|
||||
MEMO: <test1> ( -- code-table )
|
||||
"vocab:simple-flat-file/test1.txt" flat-file>biassoc ;
|
||||
|
||||
|
||||
[ 0 ] [ 0 <test1> at ] unit-test
|
||||
[ 0 ] [ 0 <test1> value-at ] unit-test
|
||||
|
||||
[ 3 ] [ 3 <test1> at ] unit-test
|
||||
[ 3 ] [ 3 <test1> value-at ] unit-test
|
||||
|
||||
[ HEX: AD2A ] [ HEX: 8253 <test1> at ] unit-test
|
||||
[ HEX: 8253 ] [ HEX: AD2A <test1> value-at ] unit-test
|
||||
|
||||
[ HEX: AD31 ] [ HEX: 8258 <test1> at ] unit-test
|
||||
[ HEX: 8258 ] [ HEX: AD31 <test1> value-at ] unit-test
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
|
||||
IN: simple-flat-file
|
||||
|
||||
: drop-comments ( seq -- newseq )
|
||||
[ "#" split1 drop ] map harvest ;
|
||||
|
||||
: split-column ( line -- columns )
|
||||
"\t" split 2 head ;
|
||||
|
||||
: parse-hex ( s -- n )
|
||||
2 short tail hex> ;
|
||||
|
||||
: parse-line ( line -- code-unicode )
|
||||
split-column [ parse-hex ] map ;
|
||||
|
||||
: process-codetable-lines ( lines -- assoc )
|
||||
drop-comments [ parse-line ] map ;
|
||||
|
||||
: flat-file>biassoc ( filename -- biassoc )
|
||||
ascii file-lines process-codetable-lines >biassoc ;
|
||||
|
|
@ -2,13 +2,6 @@ IN: xmode.utilities.tests
|
|||
USING: accessors xmode.utilities tools.test xml xml.data kernel
|
||||
strings vectors sequences io.files prettyprint assocs
|
||||
unicode.case ;
|
||||
[ "hi" 3 ] [
|
||||
{ 1 2 3 4 5 6 7 8 } [ H{ { 3 "hi" } } at ] map-find
|
||||
] unit-test
|
||||
|
||||
[ f f ] [
|
||||
{ 1 2 3 4 5 6 7 8 } [ H{ { 11 "hi" } } at ] map-find
|
||||
] unit-test
|
||||
|
||||
TUPLE: company employees type ;
|
||||
|
||||
|
|
|
@ -6,11 +6,6 @@ IN: xmode.utilities
|
|||
|
||||
: child-tags ( tag -- seq ) children>> [ tag? ] filter ;
|
||||
|
||||
: map-find ( seq quot -- result elt )
|
||||
[ f ] 2dip
|
||||
'[ nip @ dup ] find
|
||||
[ [ drop f ] unless ] dip ; inline
|
||||
|
||||
: tag-init-form ( spec -- quot )
|
||||
{
|
||||
{ [ dup quotation? ] [ [ object get tag get ] prepose ] }
|
||||
|
|
|
@ -10,14 +10,14 @@ SLOT: i
|
|||
[ i>> ] [ underlying>> ] bi ; inline
|
||||
|
||||
: next ( stream -- )
|
||||
[ 1+ ] change-i drop ;
|
||||
[ 1+ ] change-i drop ; inline
|
||||
|
||||
: sequence-read1 ( stream -- elt/f )
|
||||
[ >sequence-stream< ?nth ]
|
||||
[ next ] bi ; inline
|
||||
|
||||
: add-length ( n stream -- i+n )
|
||||
[ i>> + ] [ underlying>> length ] bi min ;
|
||||
[ i>> + ] [ underlying>> length ] bi min ; inline
|
||||
|
||||
: (sequence-read) ( n stream -- seq/f )
|
||||
[ add-length ] keep
|
||||
|
|
|
@ -397,6 +397,10 @@ HELP: find-last-from
|
|||
{ $values { "n" "a starting index" } { "seq" sequence } { "quot" { $quotation "( elt -- ? )" } } { "i" "the index of the first match, or f" } { "elt" "the first matching element, or " { $link f } } }
|
||||
{ $description "Applies the quotation to each element of the sequence in reverse order, until it outputs a true value or the start of the sequence is reached. If the quotation yields a true value for some sequence element, the word outputs the element index and the element itself. Otherwise, the word outputs an index of f and " { $link f } " as the element." } ;
|
||||
|
||||
HELP: map-find
|
||||
{ $values { "seq" sequence } { "quot" { $quotation "( elt -- result/f )" } } { "result" "the first non-false result of the quotation" } { "elt" "the first matching element, or " { $link f } } }
|
||||
{ $description "Applies the quotation to each element of the sequence, until the quotation outputs a true value. If the quotation ever yields a result which is not " { $link f } ", then the value is output, along with the element of the sequence which yielded this." } ;
|
||||
|
||||
HELP: any?
|
||||
{ $values { "seq" sequence } { "quot" { $quotation "( elt -- ? )" } } { "?" "a boolean" } }
|
||||
{ $description "Tests if the sequence contains an element satisfying the predicate, by applying the predicate to each element in turn until a true value is found. If the sequence is empty or if the end of the sequence is reached, outputs " { $link f } "." } ;
|
||||
|
@ -1455,6 +1459,7 @@ ARTICLE: "sequences-combinators" "Sequence combinators"
|
|||
{ $subsection map }
|
||||
{ $subsection map-as }
|
||||
{ $subsection map-index }
|
||||
{ $subsection map-reduce }
|
||||
{ $subsection accumulate }
|
||||
{ $subsection produce }
|
||||
{ $subsection produce-as }
|
||||
|
@ -1473,6 +1478,7 @@ ARTICLE: "sequence-2combinators" "Pair-wise sequence combinators"
|
|||
{ $subsection 2reduce }
|
||||
{ $subsection 2map }
|
||||
{ $subsection 2map-as }
|
||||
{ $subsection 2map-reduce }
|
||||
{ $subsection 2all? } ;
|
||||
|
||||
ARTICLE: "sequence-3combinators" "Triple-wise sequence combinators"
|
||||
|
@ -1507,7 +1513,8 @@ ARTICLE: "sequences-search" "Searching sequences"
|
|||
{ $subsection find }
|
||||
{ $subsection find-from }
|
||||
{ $subsection find-last }
|
||||
{ $subsection find-last-from } ;
|
||||
{ $subsection find-last-from }
|
||||
{ $subsection map-find } ;
|
||||
|
||||
ARTICLE: "sequences-trimming" "Trimming sequences"
|
||||
"Trimming words:"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: arrays kernel math namespaces sequences kernel.private
|
||||
sequences.private strings sbufs tools.test vectors
|
||||
sequences.private strings sbufs tools.test vectors assocs
|
||||
generic vocabs.loader ;
|
||||
IN: sequences.tests
|
||||
|
||||
|
@ -274,3 +274,11 @@ M: bogus-hashcode hashcode* 2drop 0 >bignum ;
|
|||
[ "asdf" iota ] must-fail
|
||||
[ T{ iota { n 10 } } ] [ 10 iota ] unit-test
|
||||
[ 0 ] [ 10 iota first ] unit-test
|
||||
|
||||
[ "hi" 3 ] [
|
||||
{ 1 2 3 4 5 6 7 8 } [ H{ { 3 "hi" } } at ] map-find
|
||||
] unit-test
|
||||
|
||||
[ f f ] [
|
||||
{ 1 2 3 4 5 6 7 8 } [ H{ { 11 "hi" } } at ] map-find
|
||||
] unit-test
|
||||
|
|
|
@ -815,6 +815,11 @@ PRIVATE>
|
|||
[ [ 2unclip-slice ] dip [ call ] keep ] dip
|
||||
compose 2reduce ; inline
|
||||
|
||||
: map-find ( seq quot -- result elt )
|
||||
[ f ] 2dip
|
||||
[ [ nip ] dip call dup ] curry find
|
||||
[ [ drop f ] unless ] dip ; inline
|
||||
|
||||
: unclip-last-slice ( seq -- butlast-slice last )
|
||||
[ but-last-slice ] [ peek ] bi ; inline
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@ strings kernel math io.mmap io.mmap.uchar accessors syntax
|
|||
combinators math.ranges unicode.categories byte-arrays
|
||||
io.encodings.string io.encodings.utf16 assocs math.parser
|
||||
combinators.short-circuit fry namespaces combinators.smart
|
||||
splitting io.encodings.ascii arrays ;
|
||||
splitting io.encodings.ascii arrays io.files.info unicode.case
|
||||
io.directories.search ;
|
||||
IN: id3
|
||||
|
||||
<PRIVATE
|
||||
|
@ -179,7 +180,7 @@ PRIVATE>
|
|||
|
||||
: id3-frame ( id3 key -- value/f ) [ ] frame-named ; inline
|
||||
|
||||
: file-id3-tags ( path -- id3v2-info/f )
|
||||
: (file-id3-tags) ( path -- id3v2-info/f )
|
||||
[
|
||||
{
|
||||
{ [ dup id3v2? ] [ read-v2-tag-data ] }
|
||||
|
@ -187,3 +188,10 @@ PRIVATE>
|
|||
[ drop f ]
|
||||
} cond
|
||||
] with-mapped-uchar-file ;
|
||||
|
||||
: file-id3-tags ( path -- id3v2-info/f )
|
||||
dup file-info size>> 0 <= [ drop f ] [ (file-id3-tags) ] if ;
|
||||
|
||||
: parse-id3s ( path -- seq )
|
||||
[ >lower ".mp3" tail? ] find-all-files
|
||||
[ dup file-id3-tags ] { } map>assoc ;
|
||||
|
|
|
@ -10,7 +10,7 @@ TUPLE: image-gadget < gadget { image image } ;
|
|||
M: image-gadget pref-dim*
|
||||
image>> dim>> ;
|
||||
|
||||
: draw-image ( tiff -- )
|
||||
: draw-image ( image -- )
|
||||
0 0 glRasterPos2i 1.0 -1.0 glPixelZoom
|
||||
[ dim>> first2 GL_RGBA GL_UNSIGNED_BYTE ]
|
||||
[ bitmap>> ] bi glDrawPixels ;
|
||||
|
|
Loading…
Reference in New Issue