From 01e9a5cb1ac984e99dd68ac534ca8ab9403c766a Mon Sep 17 00:00:00 2001
From: Daniel Ehrenberg <ehrenbed@carleton.edu>
Date: Wed, 13 Feb 2008 17:53:10 -0600
Subject: [PATCH] utf8 and utf16le streams

---
 core/io/encodings/encodings.factor   | 20 +++++++++++------
 core/io/encodings/utf16/utf16.factor | 32 ++++++++++++++++++++++++++++
 core/io/encodings/utf8/utf8.factor   |  4 +++-
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/core/io/encodings/encodings.factor b/core/io/encodings/encodings.factor
index dcc055f941..b27b89642d 100755
--- a/core/io/encodings/encodings.factor
+++ b/core/io/encodings/encodings.factor
@@ -36,14 +36,22 @@ SYMBOL: begin
 
 : full? ( resizable -- ? ) space zero? ;
 
-: decode-part-loop ( buf ch state stream quot -- string )
-    >r >r pick r> r> rot full?
-    [ 2drop 2drop >string ]
-    [ [ >r stream-read1 -rot r> call ] 2keep decode-part-loop ] if ; inline
+: end-read-loop ( buf ch state stream quot -- string/f )
+    2drop 2drop >string f like ;
 
-: decode-part ( length stream quot -- string )
+: under ( a b c -- c a b c )
+    tuck >r swapd r> ; inline
+
+: decode-read-loop ( buf ch state stream quot -- string/f )
+    >r >r pick r> r> rot full?  [ end-read-loop ] [
+        over stream-read1 [
+            -rot tuck >r >r >r -rot r> call r> r> decode-read-loop
+        ] [ end-read-loop ] if*
+    ] if ; inline
+
+: decode-read ( length stream quot -- string )
     >r swap start-decoding r>
-    decode-part-loop ; inline
+    decode-read-loop ; inline
 
 GENERIC: init-decoding ( stream encoding -- decoded-stream )
 
diff --git a/core/io/encodings/utf16/utf16.factor b/core/io/encodings/utf16/utf16.factor
index ccf76649e2..81c982dd55 100755
--- a/core/io/encodings/utf16/utf16.factor
+++ b/core/io/encodings/utf16/utf16.factor
@@ -114,3 +114,35 @@ SYMBOL: ignore
         { [ utf16be? ] [ decode-utf16be ] }
         { [ t ] [ decode-error ] }
     } cond ;
+
+! UTF16LE streams
+
+TUPLE: utf16le ;
+: <utf16le> utf16le construct-delegate ;
+! In the future, this should detect and ignore a BOM at the beginning
+
+M: utf16le init-decoding ( stream utf16le -- utf16le-stream )
+    tuck set-delegate ;
+
+M: utf16le init-encoding ( stream utf16le -- utf16le-stream )
+    tuck set-delegate ;
+
+M: utf16le stream-read1 1 swap stream-read ;
+
+M: utf16le stream-read
+    delegate [ decode-utf16le-step ] decode-read ;
+
+M: utf16le stream-read-partial stream-read ;
+
+M: utf16le stream-read-until
+    ! Copied from { c-reader stream-read-until }!!!
+    [ swap read-until-loop ] "" make
+    swap over empty? over not and [ 2drop f f ] when ;
+
+M: utf16le stream-write1
+    >r 1string r> stream-write ;
+
+M: utf16le stream-write
+    >r encode-utf16le r> delegate stream-write ;
+
+M: utf16le dispose delegate dispose ;
diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor
index c0fa66e553..de3fd5b67b 100644
--- a/core/io/encodings/utf8/utf8.factor
+++ b/core/io/encodings/utf8/utf8.factor
@@ -90,7 +90,9 @@ M: utf8 init-encoding ( stream utf8 -- utf8-stream )
 M: utf8 stream-read1 1 swap stream-read ;
 
 M: utf8 stream-read
-    [ decode-utf8-step ] decode-part ;
+    delegate [ decode-utf8-step ] decode-read ;
+
+M: utf8 stream-read-partial stream-read ;
 
 M: utf8 stream-read-until
     ! Copied from { c-reader stream-read-until }!!!