Merge branch 'master' of git://factorcode.org/git/factor

2009-03-04 21:36:34 -06:00 · 2009-03-04 21:36:34 -06:00 · 71f729499d
parent 03cb32ad9c 6ee3f802a4
commit 71f729499d
57 changed files with 56245 additions and 245 deletions
--- a/basis/images/bitmap/bitmap-tests.factor
+++ b/basis/images/bitmap/bitmap-tests.factor
@ -1,5 +1,5 @@
 USING: images.bitmap images.viewer io.encodings.binary
-io.files io.files.unique kernel tools.test ;
+io.files io.files.unique kernel tools.test images.loader ;
 IN: images.bitmap.tests

 : test-bitmap24 ( -- path )
@ -17,7 +17,7 @@ IN: images.bitmap.tests
 [ t ]
 [
    test-bitmap24
-    [ binary file-contents ] [ load-bitmap ] bi
+    [ binary file-contents ] [ load-image ] bi

    "test-bitmap24" unique-file
    [ save-bitmap ] [ binary file-contents ] bi =
--- a/basis/images/bitmap/bitmap.factor
+++ b/basis/images/bitmap/bitmap.factor
@ -6,15 +6,20 @@ kernel macros math math.bitwise math.functions namespaces sequences
 strings images endian summary ;
 IN: images.bitmap

-TUPLE: bitmap-image < image ;
+TUPLE: bitmap-image < image
+magic size reserved offset header-length width
+height planes bit-count compression size-image
+x-pels y-pels color-used color-important rgb-quads color-index ;

 ! Currently can only handle 24/32bit bitmaps.
 ! Handles row-reversed bitmaps (their height is negative)

-TUPLE: bitmap magic size reserved offset header-length width
-height planes bit-count compression size-image
-x-pels y-pels color-used color-important rgb-quads color-index
-buffer ;
+ERROR: bitmap-magic magic ;
+
+M: bitmap-magic summary
+    drop "First two bytes of bitmap stream must be 'BM'" ;
+
+<PRIVATE

 : array-copy ( bitmap array -- bitmap array' )
    over size-image>> abs memory>byte-array ;
@ -37,16 +42,11 @@ ERROR: bmp-not-supported n ;
        { 1 [ bmp-not-supported ] }
    } case >byte-array ;

-ERROR: bitmap-magic ;
-
-M: bitmap-magic summary
-    drop "First two bytes of bitmap stream must be 'BM'" ;
-
 : read2 ( -- n ) 2 read le> ;
 : read4 ( -- n ) 4 read le> ;

 : parse-file-header ( bitmap -- bitmap )
-    2 read >string dup "BM" = [ bitmap-magic ] unless >>magic
+    2 read dup "BM" sequence= [ bitmap-magic ] unless >>magic
    read4 >>size
    read4 >>reserved
    read4 >>offset ;
@ -79,17 +79,13 @@ M: bitmap-magic summary
    dup rgb-quads-length read >>rgb-quads
    dup color-index-length read >>color-index ;

-: load-bitmap-data ( path -- bitmap )
-    binary [
-        bitmap new
-        parse-file-header parse-bitmap-header parse-bitmap
+: load-bitmap-data ( path bitmap -- bitmap )
+    [ binary ] dip '[
+        _ parse-file-header parse-bitmap-header parse-bitmap
    ] with-file-reader ;

 : process-bitmap-data ( bitmap -- bitmap )
-    dup raw-bitmap>buffer >>buffer ;
-
-: load-bitmap ( path -- bitmap )
-    load-bitmap-data process-bitmap-data ;
+    dup raw-bitmap>buffer >>bitmap ;

 ERROR: unknown-component-order bitmap ;

@ -101,15 +97,16 @@ ERROR: unknown-component-order bitmap ;
        [ unknown-component-order ]
    } case ;

-: >image ( bitmap -- bitmap-image )
-    {
-        [ [ width>> ] [ height>> ] bi 2array ]
-        [ bitmap>component-order ]
-        [ buffer>> ]
-    } cleave bitmap-image boa ;
+: fill-image-slots ( bitmap -- bitmap )
+    dup {
+        [ [ width>> ] [ height>> ] bi 2array >>dim ]
+        [ bitmap>component-order >>component-order ]
+        [ bitmap>> >>bitmap ]
+    } cleave ;

-M: bitmap-image load-image* ( path bitmap -- bitmap-image )
-    drop load-bitmap >image ;
+M: bitmap-image load-image* ( path bitmap -- bitmap )
+    load-bitmap-data process-bitmap-data
+    fill-image-slots ;

 M: bitmap-image normalize-scan-line-order
    dup dim>> '[
@ -118,12 +115,12 @@ M: bitmap-image normalize-scan-line-order

 MACRO: (nbits>bitmap) ( bits -- )
    [ -3 shift ] keep '[
-        bitmap new
+        bitmap-image new
            2over * _ * >>size-image
            swap >>height
            swap >>width
-            swap array-copy [ >>buffer ] [ >>color-index ] bi
-            _ >>bit-count >image
+            swap array-copy [ >>bitmap ] [ >>color-index ] bi
+            _ >>bit-count fill-image-slots
    ] ;

 : bgr>bitmap ( array height width -- bitmap )
@ -135,11 +132,13 @@ MACRO: (nbits>bitmap) ( bits -- )
 : write2 ( n -- ) 2 >le write ;
 : write4 ( n -- ) 4 >le write ;

+PRIVATE>
+
 : save-bitmap ( bitmap path -- )
    binary [
        B{ CHAR: B CHAR: M } write
        [
-            buffer>> length 14 + 40 + write4
+            color-index>> length 14 + 40 + write4
            0 write4
            54 write4
            40 write4
--- a/basis/images/images.factor
+++ b/basis/images/images.factor
@ -68,7 +68,7 @@ GENERIC: load-image* ( path tuple -- image )
            [
                3 <sliced-groups>
                [ [ 3 head-slice reverse-here ] each ]
-                [ add-dummy-alpha ] bi
+                [ [ 255 suffix ] map ] bi concat
            ] change-bitmap
        ] }
    } case
@ -81,4 +81,4 @@ M: image normalize-scan-line-order ;
 : normalize-image ( image -- image )
    [ >byte-array ] change-bitmap
    normalize-component-order
-    normalize-scan-line-order ;
+    normalize-scan-line-order ;
--- a/basis/images/png/authors.txt
+++ b/basis/images/png/authors.txt
@ -0,0 +1 @@
+Doug Coleman
--- a/basis/images/png/png-tests.factor
+++ b/basis/images/png/png-tests.factor
@ -0,0 +1,7 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test images.png ;
+IN: images.png.tests
+
+: png-test-path ( -- path )
+    "vocab:images/test-images/rgb.png" ;
--- a/basis/images/png/png.factor
+++ b/basis/images/png/png.factor
@ -0,0 +1,41 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors constructors images io io.binary io.encodings.ascii
+io.encodings.binary io.encodings.string io.files io.files.info kernel
+sequences io.streams.limited ;
+IN: images.png
+
+TUPLE: png-image < image chunks ;
+
+CONSTRUCTOR: png-image ( -- image )
+V{ } clone >>chunks ;
+
+TUPLE: png-chunk length type data crc ;
+
+CONSTRUCTOR: png-chunk ( -- png-chunk ) ;
+
+CONSTANT: png-header B{ HEX: 89 HEX: 50 HEX: 4e HEX: 47 HEX: 0d HEX: 0a HEX: 1a HEX: 0a }
+
+ERROR: bad-png-header header ;
+
+: read-png-header ( -- )
+    8 read dup png-header sequence= [
+        bad-png-header
+    ] unless drop ;
+
+: read-png-chunks ( image -- image )
+    <png-chunk>
+    4 read be> >>length
+    4 read ascii decode >>type
+    dup length>> read >>data
+    4 read >>crc
+    [ over chunks>> push ] 
+    [ type>> ] bi "IEND" =
+    [ read-png-chunks ] unless ;
+
+: load-png ( path -- image )
+    [ binary <file-reader> ] [ file-info size>> ] bi stream-throws <limited-stream> [
+        <png-image>
+        read-png-header
+        read-png-chunks
+    ] with-input-stream ;
--- a/basis/images/test-images/rgb.png
+++ b/basis/images/test-images/rgb.png
--- a/basis/io/encodings/8-bit/8-bit.factor
+++ b/basis/io/encodings/8-bit/8-bit.factor
@ -4,7 +4,7 @@ USING: math.parser arrays io.encodings sequences kernel assocs
 hashtables io.encodings.ascii generic parser classes.tuple words
 words.symbol io io.files splitting namespaces math
 compiler.units accessors classes.singleton classes.mixin
-io.encodings.iana fry ;
+io.encodings.iana fry simple-flat-file ;
 IN: io.encodings.8-bit

 <PRIVATE
@ -35,36 +35,22 @@ CONSTANT: mappings {
 : encoding-file ( file-name -- stream )
    "vocab:io/encodings/8-bit/" ".TXT" surround ;

-: process-contents ( lines -- assoc )
-    [ "#" split1 drop ] map harvest
-    [ "\t" split 2 head [ 2 short tail hex> ] map ] map ;
-
-: byte>ch ( assoc -- array )
-    256 replacement-char <array>
-    [ '[ swap _ set-nth ] assoc-each ] keep ;
-
-: ch>byte ( assoc -- newassoc )
-    [ swap ] assoc-map >hashtable ;
-
-: parse-file ( path -- byte>ch ch>byte )
-    ascii file-lines process-contents
-    [ byte>ch ] [ ch>byte ] bi ;
-
 SYMBOL: 8-bit-encodings

-TUPLE: 8-bit decode encode ;
+TUPLE: 8-bit biassoc ;

 : encode-8-bit ( char stream assoc -- )
-    swapd at*
-    [ swap stream-write1 ] [ nip encode-error ] if ; inline
+    swapd value-at
+    [ swap stream-write1 ] [ encode-error ] if* ; inline

-M: 8-bit encode-char encode>> encode-8-bit ;
+M: 8-bit encode-char biassoc>> encode-8-bit ;

-: decode-8-bit ( stream array -- char/f )
-    swap stream-read1 dup
-    [ swap nth [ replacement-char ] unless* ] [ 2drop f ] if ; inline
+: decode-8-bit ( stream assoc -- char/f )
+    swap stream-read1
+    [ swap at [ replacement-char ] unless* ]
+    [ drop f ] if* ; inline

-M: 8-bit decode-char decode>> decode-8-bit ;
+M: 8-bit decode-char biassoc>> decode-8-bit ;

 MIXIN: 8-bit-encoding

@ -87,7 +73,7 @@ PRIVATE>
        first3
        [ create-encoding ]
        [ dupd register-encoding ]
-        [ encoding-file parse-file 8-bit boa ]
+        [ encoding-file flat-file>biassoc 8-bit boa ]
        tri*
    ] H{ } map>assoc
    8-bit-encodings set-global
--- a/basis/io/encodings/big5/CP950.TXT
+++ b/basis/io/encodings/big5/CP950.TXT
--- a/basis/io/encodings/big5/big5.factor
+++ b/basis/io/encodings/big5/big5.factor
@ -0,0 +1,9 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings.iana io.encodings.euc ;
+IN: io.encodings.big5
+
+EUC: big5 "vocab:io/encodings/big5/CP950.txt"
+
+big5 "Big5" register-encoding
+
--- a/basis/io/encodings/japanese/authors.txt
+++ b/basis/io/encodings/japanese/authors.txt
--- a/basis/io/encodings/euc-jp/euc-jp-2000-std.txt
+++ b/basis/io/encodings/euc-jp/euc-jp-2000-std.txt
--- a/basis/io/encodings/euc-jp/euc-jp.factor
+++ b/basis/io/encodings/euc-jp/euc-jp.factor
@ -0,0 +1,8 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings.euc io.encodings.iana ;
+IN: io.encodings.euc-jp
+
+EUC: euc-jp "vocab:io/encodings/euc-jp/euc-jp-2000-std.txt"
+
+euc-jp "Extended_UNIX_Code_Packed_Format_for_Japanese" register-encoding
--- a/basis/io/encodings/euc-jp/summary.txt
+++ b/basis/io/encodings/euc-jp/summary.txt
@ -0,0 +1 @@
+EUC-JP text encoding
--- a/basis/io/encodings/euc-kr/authors.txt
+++ b/basis/io/encodings/euc-kr/authors.txt
--- a/basis/io/encodings/euc-kr/data/cp949.txt
+++ b/basis/io/encodings/euc-kr/data/cp949.txt
--- a/basis/io/encodings/euc-kr/euc-kr-docs.factor
+++ b/basis/io/encodings/euc-kr/euc-kr-docs.factor
@ -1,14 +1,10 @@
 ! Copyright (C) 2009 Yun, Jonghyouk.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: help.syntax help.markup ;
-IN: io.encodings.korean
+IN: io.encodings.euc-kr

-ARTICLE: "io.encodings.korean" "Korean text encodings"
-"The " { $vocab-link "io.encodings.korean" } " vocabulary implements encodings used for Korean text besides the standard UTF encodings for Unicode strings."
-{ $subsection cp949 } ;
+ABOUT: euc-kr

-ABOUT: "io.encodings.korean"
-
-HELP: cp949
-{ $class-description "This encoding class implements Microsoft's code page #949 encoding, also called Unified Hangul Code or ks_c_5601-1987, UHC. CP949 is extended version of EUC-KR and downward-compatibility to EUC-KR. " }
+HELP: euc-kr
+{ $class-description "This encoding class implements Microsoft's code page #949 encoding, also called Unified Hangul Code or ks_c_5601-1987, UHC. CP949 is extended version of EUC-KR and downward-compatibility to EUC-KR, in practice." }
 { $see-also "encodings-introduction" } ;
--- a/basis/io/encodings/euc-kr/euc-kr.factor
+++ b/basis/io/encodings/euc-kr/euc-kr.factor
@ -0,0 +1,8 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: io.encodings.iana io.encodings.euc ;
+IN: io.encodings.euc-kr
+
+EUC: euc-kr "vocab:io/encodings/euc-kr/data/cp949.txt"
+
+euc-kr "EUC-KR" register-encoding
--- a/basis/io/encodings/euc-kr/summary.txt
+++ b/basis/io/encodings/euc-kr/summary.txt
--- a/basis/io/encodings/euc/euc-tests.factor
+++ b/basis/io/encodings/euc/euc-tests.factor
@ -0,0 +1,50 @@
+! Copyright (C) 2009 Yun, Jonghyouk.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays byte-arrays io io.encodings io.encodings.euc-kr assocs
+io.encodings.string io.streams.string io.encodings.euc.private words
+kernel locals multiline namespaces sequences strings tools.test ;
+IN: io.encodings.euc.tests
+
+: euc-kr>unicode ( ch -- ch/f )
+    euc-kr euc-table word-prop at ;
+
+: unicode>euc-kr ( ch -- ch/f )
+    euc-kr euc-table word-prop value-at ;
+
+[ f ] [ HEX: 80 euc-kr>unicode ] unit-test
+[ f ] [ HEX: ff euc-kr>unicode ] unit-test
+[ HEX: ac02 ] [ HEX: 8141 euc-kr>unicode ] unit-test
+[ HEX: 7f ] [ HEX: 7f euc-kr>unicode ] unit-test
+[ HEX: c724 ] [ HEX: c0b1 euc-kr>unicode ] unit-test
+
+[ HEX: 8141 ] [ HEX: ac02 unicode>euc-kr ] unit-test
+[ HEX: 7f ] [ HEX: 7f unicode>euc-kr ] unit-test
+[ HEX: c0b1 ] [ HEX: c724 unicode>euc-kr ] unit-test
+
+: phrase-unicode ( -- s )
+    "\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
+
+: phrase-euc-kr ( -- s )
+    {
+        HEX: b5 HEX: bf HEX: c7 HEX: d8
+        HEX: b9 HEX: b0 HEX: b0 HEX: fa
+        HEX: 20 HEX: b9 HEX: e9 HEX: b5
+        HEX: ce HEX: bb HEX: ea HEX: c0
+        HEX: cc HEX: 21
+    } ;
+
+: phrase-unicode>euc-kr ( -- s )
+    phrase-unicode euc-kr encode ;
+
+: phrase-euc-kr>unicode ( -- s )
+    phrase-euc-kr euc-kr decode ;
+
+[ t ] [ phrase-unicode>euc-kr >array phrase-euc-kr = ] unit-test
+
+[ t ]  [ phrase-euc-kr>unicode phrase-unicode = ] unit-test
+
+[ t ] [ phrase-euc-kr 1 head* euc-kr decode phrase-unicode 1 head* = ] unit-test
+
+[ t ] [ phrase-euc-kr 3 head* euc-kr decode phrase-unicode 2 head* = ] unit-test
+
+[ t ] [ phrase-euc-kr 2 head* euc-kr decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
--- a/basis/io/encodings/euc/euc.factor
+++ b/basis/io/encodings/euc/euc.factor
@ -0,0 +1,68 @@
+! Copyright (C) 2009 Daniel Ehrenberg, Jonghyouk Yun.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel io.encodings accessors assocs sequences biassocs generic
+math.order simple-flat-file io io.binary byte-arrays locals combinators
+words classes.singleton fry classes.parser parser quotations ;
+IN: io.encodings.euc
+
+TUPLE: euc { table biassoc } ;
+
+<PRIVATE
+
+: byte? ( ch -- ? )
+    HEX: 0 HEX: ff between? ;
+
+M: euc encode-char ( char stream encoding -- )
+    swapd table>> value-at [
+        dup byte?
+        [ swap stream-write1 ] [
+            h>b/b swap 2byte-array
+            swap stream-write
+        ] if
+    ] [ encode-error ] if* ;
+
+: euc-multibyte? ( ch -- ? )
+    HEX: 81 HEX: fe between? ;
+
+:: decode-multibyte ( ch stream encoding -- char )
+    stream stream-read1
+    [ ch swap 2byte-array be> encoding table>> at ]
+    [ replacement-char ] if* ;
+
+M:: euc decode-char ( stream encoding -- char/f )
+    stream stream-read1
+    {
+        { [ dup not ] [ drop f ] }
+        { [ dup euc-multibyte? ] [ stream encoding decode-multibyte ] }
+        [ encoding table>> at ]
+    } cond ;
+
+: define-method ( class word definition -- )
+    [ create-method ] dip define ;
+
+SYMBOL: euc-table
+
+: setup-euc ( word file-name -- singleton-class biassoc )
+    [ dup define-singleton-class ]
+    [ flat-file>biassoc ] bi* ;
+
+:: define-recursive-methods ( class data words -- )
+    words [| word |
+        class word [ drop data word execute ] define-method
+    ] each ;
+
+: euc-methods ( singleton-class biassoc -- )
+    [ euc-table set-word-prop ] [
+        euc boa
+        { <encoder> <decoder> }
+        define-recursive-methods
+    ] 2bi ;
+
+: define-euc ( word file-name -- )
+    setup-euc euc-methods ;
+
+PRIVATE>
+
+: EUC:
+    ! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
+    CREATE-CLASS scan-object define-euc ; parsing
--- a/basis/io/encodings/gb18030/authors.txt
+++ b/basis/io/encodings/gb18030/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/basis/io/encodings/gb18030/gb-18030-2000.xml
+++ b/basis/io/encodings/gb18030/gb-18030-2000.xml
--- a/basis/io/encodings/gb18030/gb18030-docs.factor
+++ b/basis/io/encodings/gb18030/gb18030-docs.factor
@ -1,13 +1,13 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: help.syntax help.markup ;
-IN: io.encodings.chinese
+IN: io.encodings.gb18030

-ARTICLE: "io.encodings.chinese" "Chinese text encodings"
-"The " { $vocab-link "io.encodings.chinese" } " vocabulary implements encodings used for Chinese text besides the standard UTF encodings for Unicode strings."
+ARTICLE: "io.encodings.gb18030" "GB 18030"
+"The " { $vocab-link "io.encodings.gb18030" } " vocabulary implements GB18030, a commonly used encoding for Chinese text besides the standard UTF encodings for Unicode strings."
 { $subsection gb18030 } ;

-ABOUT: "io.encodings.chinese"
+ABOUT: "io.encodings.gb18030"

 HELP: gb18030
 { $class-description "The encoding descriptor for GB 18030, a Chinese national standard for text encoding. GB 18030 consists of a unique encoding for each Unicode code point, and for this reason has been described as a UTF. It is backwards compatible with the earlier encodings GB 2312 and GBK." }
--- a/basis/io/encodings/gb18030/gb18030-tests.factor
+++ b/basis/io/encodings/gb18030/gb18030-tests.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: io.encodings.chinese io.encodings.string strings tools.test arrays ;
-IN: io.encodings.chinese.tests
+USING: io.encodings.gb18030 io.encodings.string strings tools.test arrays ;
+IN: io.encodings.gb18030.tests

 [ "hello" ] [ "hello" gb18030 encode >string ] unit-test
 [ "hello" ] [ "hello" gb18030 decode ] unit-test
--- a/basis/io/encodings/gb18030/gb18030.factor
+++ b/basis/io/encodings/gb18030/gb18030.factor
@ -4,7 +4,7 @@ USING: xml xml.data kernel io io.encodings interval-maps splitting fry
 math.parser sequences combinators assocs locals accessors math arrays
 byte-arrays values io.encodings.ascii ascii io.files biassocs
 math.order combinators.short-circuit io.binary io.encodings.iana ;
-IN: io.encodings.chinese
+IN: io.encodings.gb18030

 SINGLETON: gb18030

@ -80,7 +80,7 @@ VALUE: gb>u
 VALUE: u>gb
 VALUE: mapping

-"vocab:io/encodings/chinese/gb-18030-2000.xml"
+"vocab:io/encodings/gb18030/gb-18030-2000.xml"
 ascii <file-reader> xml>gb-data
 [ ranges-u>gb to: u>gb ] [ ranges-gb>u to: gb>u ] bi
 >biassoc to: mapping
--- a/basis/io/encodings/gb18030/summary.txt
+++ b/basis/io/encodings/gb18030/summary.txt
--- a/basis/io/encodings/johab/authors.txt
+++ b/basis/io/encodings/johab/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/basis/io/encodings/johab/johab-docs.factor
+++ b/basis/io/encodings/johab/johab-docs.factor
@ -0,0 +1,9 @@
+! Copyright (C) 2009 Yun, Jonghyouk.
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.syntax help.markup ;
+IN: io.encodings.johab
+
+ABOUT: johab
+
+HELP: johab
+{ $class-description "Korean Johab encoding (KSC5601-1992). This encoding is not commonly used anymore." } ;
--- a/basis/io/encodings/johab/johab-tests.factor
+++ b/basis/io/encodings/johab/johab-tests.factor
@ -0,0 +1,36 @@
+! Copyright (C) 2009 Yun, Jonghyouk.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays byte-arrays io io.encodings io.encodings.johab assocs
+io.encodings.string io.streams.string io.encodings.euc.private words
+kernel locals multiline namespaces sequences strings tools.test ;
+IN: io.encodings.johab.tests
+
+: johab>unicode ( ch -- ch/f )
+    johab euc-table word-prop at ;
+
+: unicode>johab ( ch -- ch/f )
+    johab euc-table word-prop value-at ;
+
+! johab encodings
+[ HEX: 20 ] [ HEX: 20 johab>unicode ] unit-test
+[ HEX: 3133 ] [ HEX: 8444 johab>unicode ] unit-test
+[ HEX: 8A5D ] [ HEX: AD4F unicode>johab ] unit-test
+
+: phrase-unicode ( -- s )
+    "\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
+
+: phrase-johab ( -- s )
+    B{
+        149 183 208 129 162 137 137 193 32 164 130 150 129 172 101
+        183 161 33
+    } ;
+
+: phrase-johab>unicode ( -- s )
+    phrase-johab johab decode ;
+
+: phrase-unicode>johab ( -- s )
+    phrase-unicode johab encode ;
+
+[ t ] [ phrase-johab>unicode phrase-unicode = ] unit-test
+[ t ] [ phrase-unicode>johab phrase-johab = ] unit-test
+
--- a/basis/io/encodings/johab/johab.factor
+++ b/basis/io/encodings/johab/johab.factor
@ -0,0 +1,7 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USE: io.encodings.euc
+IN: io.encodings.johab
+
+EUC: johab "vocab:io/encodings/johab/johab.txt" 
+
--- a/basis/io/encodings/johab/johab.txt
+++ b/basis/io/encodings/johab/johab.txt
--- a/basis/io/encodings/johab/summary.txt
+++ b/basis/io/encodings/johab/summary.txt
@ -0,0 +1 @@
+Johab Korean text encoding
--- a/basis/io/encodings/korean/korean-tests.factor
+++ b/basis/io/encodings/korean/korean-tests.factor
@ -1,46 +0,0 @@
-! Copyright (C) 2009 Yun, Jonghyouk.
-! See http://factorcode.org/license.txt for BSD license.
-USING: arrays byte-arrays io io.encodings io.encodings.korean
-io.encodings.korean.private io.encodings.string io.streams.string
-kernel locals multiline namespaces sequences strings tools.test ;
-IN: io.encodings.korean.tests
-
-! convert cp949 <> unicode
-
-[ f ] [ HEX: 80 cp949>unicode ] unit-test
-[ f ] [ HEX: ff cp949>unicode ] unit-test
-[ HEX: ac02 ] [ HEX: 8141 cp949>unicode ] unit-test
-[ HEX: 7f ] [ HEX: 7f cp949>unicode ] unit-test
-[ HEX: c724 ] [ HEX: c0b1 cp949>unicode ] unit-test
-
-[ HEX: 8141 ] [ HEX: ac02 unicode>cp949 ] unit-test
-[ HEX: 7f ] [ HEX: 7f unicode>cp949 ] unit-test
-[ HEX: c0b1 ] [ HEX: c724 unicode>cp949 ] unit-test
-
-: phrase-unicode ( -- s )
-    "\u00b3d9\u00d574\u00bb3c\u00acfc \u00bc31\u00b450\u00c0b0\u00c774!" ;
-
-: phrase-cp949 ( -- s )
-    {
-        HEX: b5 HEX: bf HEX: c7 HEX: d8
-        HEX: b9 HEX: b0 HEX: b0 HEX: fa
-        HEX: 20 HEX: b9 HEX: e9 HEX: b5
-        HEX: ce HEX: bb HEX: ea HEX: c0
-        HEX: cc HEX: 21
-    } ;
-
-: phrase-unicode>cp949 ( -- s )
-    phrase-unicode cp949 encode ;
-
-: phrase-cp949>unicode ( -- s )
-    phrase-cp949 cp949 decode ;
-
-[ t ] [ phrase-unicode>cp949 >array phrase-cp949 = ] unit-test
-
-[ t ]  [ phrase-cp949>unicode phrase-unicode = ] unit-test
-
-[ t ] [ phrase-cp949 1 head* cp949 decode phrase-unicode 1 head* = ] unit-test
-
-[ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test
-
-[ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
--- a/basis/io/encodings/korean/korean.factor
+++ b/basis/io/encodings/korean/korean.factor
@ -1,81 +0,0 @@
-! Copyright (C) 2009 Yun, Jonghyouk.
-! See http://factorcode.org/license.txt for BSD license.
-USING: assocs byte-arrays combinators io io.encodings
-io.encodings.ascii io.encodings.iana io.files kernel locals math
-math.order math.parser memoize multiline sequences splitting
-values hashtables io.binary ;
-IN: io.encodings.korean
-
-! TODO: migrate to common code-table parser (by Dan).
-
-SINGLETON: cp949
-
-cp949 "EUC-KR" register-encoding
-
-<PRIVATE
-
-! parse cp949.txt > table
-
-: cp949.txt-lines ( -- seq )
-    ! "cp949.txt" from ...
-    ! <http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT>
-    "vocab:io/encodings/korean/data/cp949.txt"
-    ascii file-lines ;
-
-: drop-comments ( seq -- newseq )
-    [ "#" split1 drop ] map harvest ;
-
-: split-column ( line -- columns )
-    "\t" split 2 head ;
-
-: parse-hex ( s -- n )
-    2 short tail hex> ;
-
-: parse-line ( line -- code-unicode )
-    split-column [ parse-hex ] map ;
-
-: process-codetable-lines ( lines -- assoc )
-    drop-comments [ parse-line ] map ; 
-
-! convert cp949 <> unicode
-
-MEMO: cp949>unicode-table ( -- hashtable )
-    cp949.txt-lines process-codetable-lines >hashtable ;
-
-MEMO: unicode>cp949-table ( -- hashtable )
-    cp949>unicode-table [ swap ] assoc-map ;
-
-unicode>cp949-table drop
-
-: cp949>unicode ( b -- u )
-    cp949>unicode-table at ;
-
-: unicode>cp949 ( u -- b )
-    unicode>cp949-table at ;
-
-: cp949-1st? ( n -- ? )
-    dup [ HEX: 81 HEX: fe between? ] when ;
-
-: byte? ( n -- ? )
-    0 HEX: ff between? ;
-
-M:: cp949 encode-char ( char stream encoding -- )
-    char unicode>cp949 byte?
-    [ char 1byte-array stream stream-write ] [
-        char unicode>cp949
-        h>b/b swap 2byte-array
-        stream stream-write
-    ] if ;
-
-: decode-char-step2 ( c stream -- char )
-    stream-read1
-    [ 2byte-array be> cp949>unicode ]
-    [ drop replacement-char ] if* ;
-
-M:: cp949 decode-char ( stream encoding -- char/f )
-    stream stream-read1
-    {
-        { [ dup not ] [ drop f ] }
-        { [ dup cp949-1st? ] [ stream decode-char-step2 ] }
-        [ ]
-    } cond ;
--- a/basis/io/encodings/shift-jis/CP932.txt
+++ b/basis/io/encodings/shift-jis/CP932.txt
--- a/basis/io/encodings/shift-jis/authors.txt
+++ b/basis/io/encodings/shift-jis/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/basis/io/encodings/shift-jis/euc-0201.txt
+++ b/basis/io/encodings/shift-jis/euc-0201.txt
@ -0,0 +1,208 @@
+#
+#	Name:             JIS X 0201 (1976) to Unicode 1.1 Table
+#	Unicode version:  1.1
+#	Table version:    0.9
+#	Table format:     Format A
+#	Date:             8 March 1994
+#
+#	Copyright (c) 1991-1994 Unicode, Inc.  All Rights reserved.
+#
+#	This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+#	No claims are made as to fitness for any particular purpose.  No
+#	warranties of any kind are expressed or implied.  The recipient
+#	agrees to determine applicability of information provided.  If this
+#	file has been provided on magnetic media by Unicode, Inc., the sole
+#	remedy for any claim will be exchange of defective media within 90
+#	days of receipt.
+#
+#	Recipient is granted the right to make copies in any form for
+#	internal distribution and to freely use the information supplied
+#	in the creation of products supporting Unicode.  Unicode, Inc.
+#	specifically excludes the right to re-distribute this file directly
+#	to third parties or other organizations whether for profit or not.
+#
+#	General notes:
+#
+#
+# This table contains one set of mappings from JIS X 0201 into Unicode.
+# Note that these data are *possible* mappings only and may not be the
+# same as those used by actual products, nor may they be the best suited
+# for all uses.  For more information on the mappings between various code
+# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
+# VENDORS mapping data.  Normative information on the mapping between
+# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
+# latest Unicode Character Database.
+#
+# If you have carefully considered the fact that the mappings in
+# this table are only one possible set of mappings between JIS X 0201 and
+# Unicode and have no normative status, but still feel that you
+# have located an error in the table that requires fixing, you may
+# report any such error to errata@unicode.org.
+#
+#
+#	Format:  Three tab-separated columns
+#		Column #1 is the shift JIS code (in hex as 0xXX)
+#		Column #2 is the Unicode (in hex as 0xXXXX)
+#		Column #3 the Unicode (ISO 10646) name (follows a comment sign)
+#
+#	The entries are in JIS order
+#
+#
+0x20	0x0020	# SPACE
+0x21	0x0021	# EXCLAMATION MARK
+0x22	0x0022	# QUOTATION MARK
+0x23	0x0023	# NUMBER SIGN
+0x24	0x0024	# DOLLAR SIGN
+0x25	0x0025	# PERCENT SIGN
+0x26	0x0026	# AMPERSAND
+0x27	0x0027	# APOSTROPHE
+0x28	0x0028	# LEFT PARENTHESIS
+0x29	0x0029	# RIGHT PARENTHESIS
+0x2A	0x002A	# ASTERISK
+0x2B	0x002B	# PLUS SIGN
+0x2C	0x002C	# COMMA
+0x2D	0x002D	# HYPHEN-MINUS
+0x2E	0x002E	# FULL STOP
+0x2F	0x002F	# SOLIDUS
+0x30	0x0030	# DIGIT ZERO
+0x31	0x0031	# DIGIT ONE
+0x32	0x0032	# DIGIT TWO
+0x33	0x0033	# DIGIT THREE
+0x34	0x0034	# DIGIT FOUR
+0x35	0x0035	# DIGIT FIVE
+0x36	0x0036	# DIGIT SIX
+0x37	0x0037	# DIGIT SEVEN
+0x38	0x0038	# DIGIT EIGHT
+0x39	0x0039	# DIGIT NINE
+0x3A	0x003A	# COLON
+0x3B	0x003B	# SEMICOLON
+0x3C	0x003C	# LESS-THAN SIGN
+0x3D	0x003D	# EQUALS SIGN
+0x3E	0x003E	# GREATER-THAN SIGN
+0x3F	0x003F	# QUESTION MARK
+0x40	0x0040	# COMMERCIAL AT
+0x41	0x0041	# LATIN CAPITAL LETTER A
+0x42	0x0042	# LATIN CAPITAL LETTER B
+0x43	0x0043	# LATIN CAPITAL LETTER C
+0x44	0x0044	# LATIN CAPITAL LETTER D
+0x45	0x0045	# LATIN CAPITAL LETTER E
+0x46	0x0046	# LATIN CAPITAL LETTER F
+0x47	0x0047	# LATIN CAPITAL LETTER G
+0x48	0x0048	# LATIN CAPITAL LETTER H
+0x49	0x0049	# LATIN CAPITAL LETTER I
+0x4A	0x004A	# LATIN CAPITAL LETTER J
+0x4B	0x004B	# LATIN CAPITAL LETTER K
+0x4C	0x004C	# LATIN CAPITAL LETTER L
+0x4D	0x004D	# LATIN CAPITAL LETTER M
+0x4E	0x004E	# LATIN CAPITAL LETTER N
+0x4F	0x004F	# LATIN CAPITAL LETTER O
+0x50	0x0050	# LATIN CAPITAL LETTER P
+0x51	0x0051	# LATIN CAPITAL LETTER Q
+0x52	0x0052	# LATIN CAPITAL LETTER R
+0x53	0x0053	# LATIN CAPITAL LETTER S
+0x54	0x0054	# LATIN CAPITAL LETTER T
+0x55	0x0055	# LATIN CAPITAL LETTER U
+0x56	0x0056	# LATIN CAPITAL LETTER V
+0x57	0x0057	# LATIN CAPITAL LETTER W
+0x58	0x0058	# LATIN CAPITAL LETTER X
+0x59	0x0059	# LATIN CAPITAL LETTER Y
+0x5A	0x005A	# LATIN CAPITAL LETTER Z
+0x5B	0x005B	# LEFT SQUARE BRACKET
+0x5C	0x00A5	# YEN SIGN
+0x5D	0x005D	# RIGHT SQUARE BRACKET
+0x5E	0x005E	# CIRCUMFLEX ACCENT
+0x5F	0x005F	# LOW LINE
+0x60	0x0060	# GRAVE ACCENT
+0x61	0x0061	# LATIN SMALL LETTER A
+0x62	0x0062	# LATIN SMALL LETTER B
+0x63	0x0063	# LATIN SMALL LETTER C
+0x64	0x0064	# LATIN SMALL LETTER D
+0x65	0x0065	# LATIN SMALL LETTER E
+0x66	0x0066	# LATIN SMALL LETTER F
+0x67	0x0067	# LATIN SMALL LETTER G
+0x68	0x0068	# LATIN SMALL LETTER H
+0x69	0x0069	# LATIN SMALL LETTER I
+0x6A	0x006A	# LATIN SMALL LETTER J
+0x6B	0x006B	# LATIN SMALL LETTER K
+0x6C	0x006C	# LATIN SMALL LETTER L
+0x6D	0x006D	# LATIN SMALL LETTER M
+0x6E	0x006E	# LATIN SMALL LETTER N
+0x6F	0x006F	# LATIN SMALL LETTER O
+0x70	0x0070	# LATIN SMALL LETTER P
+0x71	0x0071	# LATIN SMALL LETTER Q
+0x72	0x0072	# LATIN SMALL LETTER R
+0x73	0x0073	# LATIN SMALL LETTER S
+0x74	0x0074	# LATIN SMALL LETTER T
+0x75	0x0075	# LATIN SMALL LETTER U
+0x76	0x0076	# LATIN SMALL LETTER V
+0x77	0x0077	# LATIN SMALL LETTER W
+0x78	0x0078	# LATIN SMALL LETTER X
+0x79	0x0079	# LATIN SMALL LETTER Y
+0x7A	0x007A	# LATIN SMALL LETTER Z
+0x7B	0x007B	# LEFT CURLY BRACKET
+0x7C	0x007C	# VERTICAL LINE
+0x7D	0x007D	# RIGHT CURLY BRACKET
+0x7E	0x203E	# OVERLINE
+0xA1	0xFF61	# HALFWIDTH IDEOGRAPHIC FULL STOP
+0xA2	0xFF62	# HALFWIDTH LEFT CORNER BRACKET
+0xA3	0xFF63	# HALFWIDTH RIGHT CORNER BRACKET
+0xA4	0xFF64	# HALFWIDTH IDEOGRAPHIC COMMA
+0xA5	0xFF65	# HALFWIDTH KATAKANA MIDDLE DOT
+0xA6	0xFF66	# HALFWIDTH KATAKANA LETTER WO
+0xA7	0xFF67	# HALFWIDTH KATAKANA LETTER SMALL A
+0xA8	0xFF68	# HALFWIDTH KATAKANA LETTER SMALL I
+0xA9	0xFF69	# HALFWIDTH KATAKANA LETTER SMALL U
+0xAA	0xFF6A	# HALFWIDTH KATAKANA LETTER SMALL E
+0xAB	0xFF6B	# HALFWIDTH KATAKANA LETTER SMALL O
+0xAC	0xFF6C	# HALFWIDTH KATAKANA LETTER SMALL YA
+0xAD	0xFF6D	# HALFWIDTH KATAKANA LETTER SMALL YU
+0xAE	0xFF6E	# HALFWIDTH KATAKANA LETTER SMALL YO
+0xAF	0xFF6F	# HALFWIDTH KATAKANA LETTER SMALL TU
+0xB0	0xFF70	# HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+0xB1	0xFF71	# HALFWIDTH KATAKANA LETTER A
+0xB2	0xFF72	# HALFWIDTH KATAKANA LETTER I
+0xB3	0xFF73	# HALFWIDTH KATAKANA LETTER U
+0xB4	0xFF74	# HALFWIDTH KATAKANA LETTER E
+0xB5	0xFF75	# HALFWIDTH KATAKANA LETTER O
+0xB6	0xFF76	# HALFWIDTH KATAKANA LETTER KA
+0xB7	0xFF77	# HALFWIDTH KATAKANA LETTER KI
+0xB8	0xFF78	# HALFWIDTH KATAKANA LETTER KU
+0xB9	0xFF79	# HALFWIDTH KATAKANA LETTER KE
+0xBA	0xFF7A	# HALFWIDTH KATAKANA LETTER KO
+0xBB	0xFF7B	# HALFWIDTH KATAKANA LETTER SA
+0xBC	0xFF7C	# HALFWIDTH KATAKANA LETTER SI
+0xBD	0xFF7D	# HALFWIDTH KATAKANA LETTER SU
+0xBE	0xFF7E	# HALFWIDTH KATAKANA LETTER SE
+0xBF	0xFF7F	# HALFWIDTH KATAKANA LETTER SO
+0xC0	0xFF80	# HALFWIDTH KATAKANA LETTER TA
+0xC1	0xFF81	# HALFWIDTH KATAKANA LETTER TI
+0xC2	0xFF82	# HALFWIDTH KATAKANA LETTER TU
+0xC3	0xFF83	# HALFWIDTH KATAKANA LETTER TE
+0xC4	0xFF84	# HALFWIDTH KATAKANA LETTER TO
+0xC5	0xFF85	# HALFWIDTH KATAKANA LETTER NA
+0xC6	0xFF86	# HALFWIDTH KATAKANA LETTER NI
+0xC7	0xFF87	# HALFWIDTH KATAKANA LETTER NU
+0xC8	0xFF88	# HALFWIDTH KATAKANA LETTER NE
+0xC9	0xFF89	# HALFWIDTH KATAKANA LETTER NO
+0xCA	0xFF8A	# HALFWIDTH KATAKANA LETTER HA
+0xCB	0xFF8B	# HALFWIDTH KATAKANA LETTER HI
+0xCC	0xFF8C	# HALFWIDTH KATAKANA LETTER HU
+0xCD	0xFF8D	# HALFWIDTH KATAKANA LETTER HE
+0xCE	0xFF8E	# HALFWIDTH KATAKANA LETTER HO
+0xCF	0xFF8F	# HALFWIDTH KATAKANA LETTER MA
+0xD0	0xFF90	# HALFWIDTH KATAKANA LETTER MI
+0xD1	0xFF91	# HALFWIDTH KATAKANA LETTER MU
+0xD2	0xFF92	# HALFWIDTH KATAKANA LETTER ME
+0xD3	0xFF93	# HALFWIDTH KATAKANA LETTER MO
+0xD4	0xFF94	# HALFWIDTH KATAKANA LETTER YA
+0xD5	0xFF95	# HALFWIDTH KATAKANA LETTER YU
+0xD6	0xFF96	# HALFWIDTH KATAKANA LETTER YO
+0xD7	0xFF97	# HALFWIDTH KATAKANA LETTER RA
+0xD8	0xFF98	# HALFWIDTH KATAKANA LETTER RI
+0xD9	0xFF99	# HALFWIDTH KATAKANA LETTER RU
+0xDA	0xFF9A	# HALFWIDTH KATAKANA LETTER RE
+0xDB	0xFF9B	# HALFWIDTH KATAKANA LETTER RO
+0xDC	0xFF9C	# HALFWIDTH KATAKANA LETTER WA
+0xDD	0xFF9D	# HALFWIDTH KATAKANA LETTER N
+0xDE	0xFF9E	# HALFWIDTH KATAKANA VOICED SOUND MARK
+0xDF	0xFF9F	# HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
--- a/basis/io/encodings/shift-jis/euc-0208.txt
+++ b/basis/io/encodings/shift-jis/euc-0208.txt
--- a/basis/io/encodings/shift-jis/euc-0212.txt
+++ b/basis/io/encodings/shift-jis/euc-0212.txt
--- a/basis/io/encodings/shift-jis/shift-jis-docs.factor
+++ b/basis/io/encodings/shift-jis/shift-jis-docs.factor
@ -1,14 +1,14 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: help.markup help.syntax ;
-IN: io.encodings.japanese
+IN: io.encodings.shift-jis

-ARTICLE: "io.encodings.japanese" "Japanese text encodings"
-"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
+ARTICLE: "io.encodings.shift-jis" "Shift JIS"
+"Shift JIS is a text encoding for Japanese. There are multiple versions, depending on whether the offical standard or the modified Microsoft version is required."
 { $subsection shift-jis }
 { $subsection windows-31j } ;

-ABOUT: "io.encodings.japanese"
+ABOUT: "io.encodings.shift-jis"

 HELP: windows-31j
 { $class-description "The encoding descriptor Windows-31J, which is sometimes informally called Shift JIS. This is based on Code Page 932." }
--- a/basis/io/encodings/shift-jis/shift-jis-tests.factor
+++ b/basis/io/encodings/shift-jis/shift-jis-tests.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
-USING: io.encodings.japanese tools.test io.encodings.string arrays strings ;
-IN: io.encodings.japanese.tests
+USING: io.encodings.shift-jis tools.test io.encodings.string arrays strings ;
+IN: io.encodings.shift-jis.tests

 [ { CHAR: replacement-character } ] [ { 141 } shift-jis decode >array ] unit-test
 [ "" ] [ "" shift-jis decode >string ] unit-test
--- a/basis/io/encodings/shift-jis/shift-jis.factor
+++ b/basis/io/encodings/shift-jis/shift-jis.factor
@ -3,8 +3,9 @@
 USING: sequences kernel io io.files combinators.short-circuit
 math.order values assocs io.encodings io.binary fry strings math
 io.encodings.ascii arrays byte-arrays accessors splitting
-math.parser biassocs io.encodings.iana ;
-IN: io.encodings.japanese
+math.parser biassocs io.encodings.iana
+locals multiline combinators simple-flat-file ;
+IN: io.encodings.shift-jis

 SINGLETON: shift-jis

@ -28,26 +29,16 @@ M: windows-31j <decoder> drop windows-31j-table <decoder> ;

 TUPLE: jis assoc ;

-: <jis> ( assoc -- jis )
-    [ nip ] assoc-filter
-    >biassoc jis boa ;
-
 : ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
 : jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;

-: process-jis ( lines -- assoc )
-    [ "#" split1 drop ] map harvest [
-        "\t" split 2 head
-        [ 2 short tail hex> ] map
-    ] map ;
-
 : make-jis ( filename -- jis )
-    ascii file-lines process-jis <jis> ;
+    flat-file>biassoc [ nip ] assoc-filter jis boa ;

-"vocab:io/encodings/japanese/CP932.txt"
+"vocab:io/encodings/shift-jis/CP932.txt"
 make-jis to: windows-31j-table

-"vocab:io/encodings/japanese/sjis-0208-1997-std.txt"
+"vocab:io/encodings/shift-jis/sjis-0208-1997-std.txt"
 make-jis to: shift-jis-table

 : small? ( char -- ? )
@ -71,5 +62,3 @@ M: jis decode-char
            [ 2drop replacement-char ] if*
        ] if
    ] [ 2drop f ] if* ;
-
-PRIVATE>
--- a/basis/io/encodings/shift-jis/sjis-0208-1997-std.txt
+++ b/basis/io/encodings/shift-jis/sjis-0208-1997-std.txt
--- a/basis/io/encodings/shift-jis/summary.txt
+++ b/basis/io/encodings/shift-jis/summary.txt
--- a/basis/io/encodings/utf16n/authors.txt
+++ b/basis/io/encodings/utf16n/authors.txt
@ -0,0 +1 @@
+Daniel Ehrenberg
--- a/basis/simple-flat-file/simple-flat-file-docs.factor
+++ b/basis/simple-flat-file/simple-flat-file-docs.factor
@ -0,0 +1,8 @@
+USING: help.syntax help.markup strings ;
+IN: simple-flat-file
+
+ABOUT: "simple-flat-file"
+
+ARTICLE: "simple-flat-file" "Parsing simple flat files"
+"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
+{ $subsection flat-file>biassoc } ;
--- a/basis/simple-flat-file/simple-flat-file-tests.factor
+++ b/basis/simple-flat-file/simple-flat-file-tests.factor
@ -0,0 +1,23 @@
+! Copyright (C) 2009 Yun, Jonghyouk.
+! See http://factorcode.org/license.txt for BSD license.
+USING: simple-flat-file tools.test memoize ;
+IN: simple-flat-file.tests
+
+
+MEMO: <test1> ( -- code-table )
+    "vocab:simple-flat-file/test1.txt" flat-file>biassoc ;
+
+
+[ 0 ] [ 0 <test1> at ] unit-test
+[ 0 ] [ 0 <test1> value-at ] unit-test
+
+[ 3 ] [ 3 <test1> at ] unit-test
+[ 3 ] [ 3 <test1> value-at ] unit-test
+
+[ HEX: AD2A ] [ HEX: 8253 <test1> at ] unit-test
+[ HEX: 8253 ] [ HEX: AD2A <test1> value-at ] unit-test
+
+[ HEX: AD31 ] [ HEX: 8258 <test1> at ] unit-test
+[ HEX: 8258 ] [ HEX: AD31 <test1> value-at ] unit-test
+
+
--- a/basis/simple-flat-file/simple-flat-file.factor
+++ b/basis/simple-flat-file/simple-flat-file.factor
@ -0,0 +1,23 @@
+! Copyright (C) 2009 Daniel Ehrenberg
+! See http://factorcode.org/license.txt for BSD license.
+USING: sequences splitting kernel math.parser io.files io.encodings.ascii biassocs ;
+IN: simple-flat-file
+
+: drop-comments ( seq -- newseq )
+    [ "#" split1 drop ] map harvest ;
+
+: split-column ( line -- columns )
+    "\t" split 2 head ;
+
+: parse-hex ( s -- n )
+    2 short tail hex> ;
+
+: parse-line ( line -- code-unicode )
+    split-column [ parse-hex ] map ;
+
+: process-codetable-lines ( lines -- assoc )
+    drop-comments [ parse-line ] map ; 
+
+: flat-file>biassoc ( filename -- biassoc )
+    ascii file-lines process-codetable-lines >biassoc ;
+
--- a/basis/xmode/utilities/utilities-tests.factor
+++ b/basis/xmode/utilities/utilities-tests.factor
@ -2,13 +2,6 @@ IN: xmode.utilities.tests
 USING: accessors xmode.utilities tools.test xml xml.data kernel
 strings vectors sequences io.files prettyprint assocs
 unicode.case ;
-[ "hi" 3 ] [
-    { 1 2 3 4 5 6 7 8 } [ H{ { 3 "hi" } } at ] map-find
-] unit-test
-
-[ f f ] [
-    { 1 2 3 4 5 6 7 8 } [ H{ { 11 "hi" } } at ] map-find
-] unit-test

 TUPLE: company employees type ;

--- a/basis/xmode/utilities/utilities.factor
+++ b/basis/xmode/utilities/utilities.factor
@ -6,11 +6,6 @@ IN: xmode.utilities

 : child-tags ( tag -- seq ) children>> [ tag? ] filter ;

-: map-find ( seq quot -- result elt )
-    [ f ] 2dip
-    '[ nip @ dup ] find
-    [ [ drop f ] unless ] dip ; inline
-
 : tag-init-form ( spec -- quot )
    {
        { [ dup quotation? ] [ [ object get tag get ] prepose ] }
--- a/core/io/streams/sequence/sequence.factor
+++ b/core/io/streams/sequence/sequence.factor
@ -10,14 +10,14 @@ SLOT: i
    [ i>> ] [ underlying>> ] bi ; inline

 : next ( stream -- )
-    [ 1+ ] change-i drop ;
+    [ 1+ ] change-i drop ; inline

 : sequence-read1 ( stream -- elt/f )
    [ >sequence-stream< ?nth ]
    [ next ] bi ; inline

 : add-length ( n stream -- i+n )
-    [ i>> + ] [ underlying>> length ] bi min  ;
+    [ i>> + ] [ underlying>> length ] bi min  ; inline

 : (sequence-read) ( n stream -- seq/f )
    [ add-length ] keep
--- a/core/sequences/sequences-docs.factor
+++ b/core/sequences/sequences-docs.factor
@ -397,6 +397,10 @@ HELP: find-last-from
 { $values { "n" "a starting index" } { "seq" sequence } { "quot" { $quotation "( elt -- ? )" } } { "i" "the index of the first match, or f" } { "elt" "the first matching element, or " { $link f } } }
 { $description "Applies the quotation to each element of the sequence in reverse order, until it outputs a true value or the start of the sequence is reached. If the quotation yields a true value for some sequence element, the word outputs the element index and the element itself. Otherwise, the word outputs an index of f and " { $link f } " as the element." } ;

+HELP: map-find
+{ $values { "seq" sequence } { "quot" { $quotation "( elt -- result/f )" } } { "result" "the first non-false result of the quotation" } { "elt" "the first matching element, or " { $link f } } }
+{ $description "Applies the quotation to each element of the sequence, until the quotation outputs a true value. If the quotation ever yields a result which is not " { $link f } ", then the value is output, along with the element of the sequence which yielded this." } ;
+
 HELP: any?
 { $values { "seq" sequence } { "quot" { $quotation "( elt -- ? )" } } { "?" "a boolean" } }
 { $description "Tests if the sequence contains an element satisfying the predicate, by applying the predicate to each element in turn until a true value is found. If the sequence is empty or if the end of the sequence is reached, outputs " { $link f } "." } ;
@ -1455,6 +1459,7 @@ ARTICLE: "sequences-combinators" "Sequence combinators"
 { $subsection map }
 { $subsection map-as }
 { $subsection map-index }
+{ $subsection map-reduce }
 { $subsection accumulate }
 { $subsection produce }
 { $subsection produce-as }
@ -1473,6 +1478,7 @@ ARTICLE: "sequence-2combinators" "Pair-wise sequence combinators"
 { $subsection 2reduce }
 { $subsection 2map }
 { $subsection 2map-as }
+{ $subsection 2map-reduce }
 { $subsection 2all? } ;

 ARTICLE: "sequence-3combinators" "Triple-wise sequence combinators"
@ -1507,7 +1513,8 @@ ARTICLE: "sequences-search" "Searching sequences"
 { $subsection find }
 { $subsection find-from }
 { $subsection find-last }
-{ $subsection find-last-from } ;
+{ $subsection find-last-from }
+{ $subsection map-find } ;

 ARTICLE: "sequences-trimming" "Trimming sequences"
 "Trimming words:"
--- a/core/sequences/sequences-tests.factor
+++ b/core/sequences/sequences-tests.factor
@ -1,5 +1,5 @@
 USING: arrays kernel math namespaces sequences kernel.private
-sequences.private strings sbufs tools.test vectors
+sequences.private strings sbufs tools.test vectors assocs
 generic vocabs.loader ;
 IN: sequences.tests

@ -274,3 +274,11 @@ M: bogus-hashcode hashcode* 2drop 0 >bignum ;
 [ "asdf" iota ] must-fail
 [ T{ iota { n 10 } } ] [ 10 iota ] unit-test
 [ 0 ] [ 10 iota first ] unit-test
+
+[ "hi" 3 ] [
+    { 1 2 3 4 5 6 7 8 } [ H{ { 3 "hi" } } at ] map-find
+] unit-test
+
+[ f f ] [
+    { 1 2 3 4 5 6 7 8 } [ H{ { 11 "hi" } } at ] map-find
+] unit-test
--- a/core/sequences/sequences.factor
+++ b/core/sequences/sequences.factor
@ -815,6 +815,11 @@ PRIVATE>
    [ [ 2unclip-slice ] dip [ call ] keep ] dip
    compose 2reduce ; inline

+: map-find ( seq quot -- result elt )
+    [ f ] 2dip
+    [ [ nip ] dip call dup ] curry find
+    [ [ drop f ] unless ] dip ; inline
+
 : unclip-last-slice ( seq -- butlast-slice last )
    [ but-last-slice ] [ peek ] bi ; inline

--- a/extra/id3/id3.factor
+++ b/extra/id3/id3.factor
@ -5,7 +5,8 @@ strings kernel math io.mmap io.mmap.uchar accessors syntax
 combinators math.ranges unicode.categories byte-arrays
 io.encodings.string io.encodings.utf16 assocs math.parser
 combinators.short-circuit fry namespaces combinators.smart
-splitting io.encodings.ascii arrays ;
+splitting io.encodings.ascii arrays io.files.info unicode.case
+io.directories.search ;
 IN: id3

 <PRIVATE
@ -179,7 +180,7 @@ PRIVATE>

 : id3-frame ( id3 key -- value/f ) [ ] frame-named ; inline

-: file-id3-tags ( path -- id3v2-info/f )
+: (file-id3-tags) ( path -- id3v2-info/f )
    [
        {
            { [ dup id3v2? ] [ read-v2-tag-data ] }
@ -187,3 +188,10 @@ PRIVATE>
            [ drop f ]
        } cond
    ] with-mapped-uchar-file ;
+
+: file-id3-tags ( path -- id3v2-info/f )
+    dup file-info size>> 0 <= [ drop f ] [ (file-id3-tags) ] if ;
+
+: parse-id3s ( path -- seq )
+    [ >lower ".mp3" tail? ] find-all-files
+    [ dup file-id3-tags ] { } map>assoc ;
--- a/extra/images/viewer/viewer.factor
+++ b/extra/images/viewer/viewer.factor
@ -10,7 +10,7 @@ TUPLE: image-gadget < gadget { image image } ;
 M: image-gadget pref-dim*
    image>> dim>> ;

-: draw-image ( tiff -- )
+: draw-image ( image -- )
    0 0 glRasterPos2i 1.0 -1.0 glPixelZoom
    [ dim>> first2 GL_RGBA GL_UNSIGNED_BYTE ]
    [ bitmap>> ] bi glDrawPixels ;