diff --git a/basis/io/encodings/utf32/utf32.factor b/basis/io/encodings/utf32/utf32.factor index 68fb6cd2f6..1eaccb3e6b 100644 --- a/basis/io/encodings/utf32/utf32.factor +++ b/basis/io/encodings/utf32/utf32.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: math kernel io.encodings combinators io io.encodings.utf16 -generalizations sequences ; +sequences io.binary ; IN: io.encodings.utf32 SINGLETON: utf32be @@ -12,62 +12,31 @@ SINGLETON: utf32 char ( d c b a -- abcd ) - [ - 24 shift -roll [ - 16 shift -rot [ - 8 shift swap [ - bitor bitor bitor - ] with-replacement - ] with-replacement - ] with-replacement - ] with-replacement ; +: char> ( stream encoding quot -- ch ) + nip swap 4 swap stream-read dup length { + { 0 [ 2drop f ] } + { 4 [ swap call ] } + [ 3drop replacement-char ] + } case ; inline M: utf32be decode-char - drop stream-read4 4spin - [ >char ] [ 3drop f ] if* ; + [ be> ] char> ; M: utf32le decode-char - drop stream-read4 4 npick - [ >char ] [ 2drop 2drop f ] if ; + [ le> ] char> ; ! Encoding -: split-off ( ab -- b a ) - [ HEX: FF bitand ] [ -8 shift ] bi ; - -: char> ( abcd -- d b c a ) - split-off split-off split-off ; - -: stream-write4 ( d c b a stream -- ) - { - [ stream-write1 ] - [ stream-write1 ] - [ stream-write1 ] - [ stream-write1 ] - } cleave ; +: >char ( char stream encoding quot -- ) + nip 4 swap curry dip stream-write ; inline M: utf32be encode-char - drop [ char> ] dip stream-write4 ; + [ >be ] >char ; M: utf32le encode-char - drop [ char> 4spin ] dip stream-write4 ; + [ >le ] >char ; ! UTF-32 diff --git a/core/io/encodings/encodings-docs.factor b/core/io/encodings/encodings-docs.factor index ed39e74878..509757c68a 100644 --- a/core/io/encodings/encodings-docs.factor +++ b/core/io/encodings/encodings-docs.factor @@ -78,6 +78,7 @@ ARTICLE: "encodings-descriptors" "Encoding descriptors" { $subsection "io.encodings.binary" } { $subsection "io.encodings.utf8" } { $subsection "io.encodings.utf16" } +{ $vocab-subsection "UTF-32 encoding" "io.encodings.utf32" } { $vocab-subsection "Strict encodings" "io.encodings.strict" } "Legacy encodings:" { $vocab-subsection "8-bit encodings" "io.encodings.8-bit" }