From 0f01c81cfb4e31fa60c42f873d2e7e734e0f6866 Mon Sep 17 00:00:00 2001 From: Sascha Matzke Date: Fri, 18 Jun 2010 12:51:35 +0200 Subject: [PATCH] fixed utf8 handling in bson --- extra/bson/bson-tests.factor | 4 ++-- extra/bson/reader/reader.factor | 6 ++++-- extra/bson/writer/writer.factor | 12 ++++++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/extra/bson/bson-tests.factor b/extra/bson/bson-tests.factor index 7353a9a831..5540cb2ef5 100644 --- a/extra/bson/bson-tests.factor +++ b/extra/bson/bson-tests.factor @@ -8,8 +8,8 @@ IN: bson.tests [ H{ { "a" "a string" } } ] [ H{ { "a" "a string" } } turnaround ] unit-test -[ H{ { "a" "a string" } { "b" H{ { "a" "a string" } } } } ] -[ H{ { "a" "a string" } { "b" H{ { "a" "a string" } } } } turnaround ] unit-test +[ H{ { "a" "a string" } { "b" H{ { "a" "アップルからの最新のニュースや情報を読む" } } } } ] +[ H{ { "a" "a string" } { "b" H{ { "a" "アップルからの最新のニュースや情報を読む" } } } } turnaround ] unit-test [ H{ { "a list" { 1 2.234 "hello world" } } } ] [ H{ { "a list" { 1 2.234 "hello world" } } } turnaround ] unit-test diff --git a/extra/bson/reader/reader.factor b/extra/bson/reader/reader.factor index a007431e4a..f1f3ab8508 100644 --- a/extra/bson/reader/reader.factor +++ b/extra/bson/reader/reader.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs bson.constants calendar combinators combinators.short-circuit io io.binary kernel math locals +io.encodings.utf8 io.encodings namespaces sequences serialize strings vectors byte-arrays ; FROM: io.encodings.binary => binary ; @@ -34,10 +35,11 @@ DEFER: read-elements read-byte-raw first ; inline : read-cstring ( -- string ) - "\0" read-until drop >string ; inline + input-stream get utf8 + "\0" swap stream-read-until drop ; inline : read-sized-string ( length -- string ) - read 1 head-slice* >string ; inline + read binary [ read-cstring ] with-byte-reader ; inline : read-timestamp ( -- timestamp ) 8 read [ 4 head signed-le> ] [ 4 tail signed-le> ] bi ; diff --git a/extra/bson/writer/writer.factor b/extra/bson/writer/writer.factor index c711451634..e02b2c6da2 100644 --- a/extra/bson/writer/writer.factor +++ b/extra/bson/writer/writer.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs bson.constants byte-arrays calendar combinators.short-circuit fry hashtables io io.binary +io.encodings.utf8 io.encodings io.streams.byte-array kernel linked-assocs literals math math.parser namespaces byte-vectors quotations sequences serialize strings vectors dlists alien.accessors ; FROM: words => word? word ; @@ -42,8 +43,11 @@ TYPED: write-int32 ( int: integer -- ) INT32-SIZE (>le) ; inline TYPED: write-double ( real: float -- ) double>bits INT64-SIZE (>le) ; inline +TYPED: write-utf8-string ( string: string -- ) + output-stream get utf8 stream-write ; inline + TYPED: write-cstring ( string: string -- ) - get-output [ length ] [ ] bi copy 0 write1 ; inline + write-utf8-string 0 write1 ; inline : write-longlong ( object -- ) INT64-SIZE (>le) ; inline @@ -94,8 +98,12 @@ TYPED: (serialize-code) ( code: code -- ) [ length write-int32 ] [ T_Binary_Custom write1 write ] bi ; inline +: write-string-length ( string -- ) + [ length>> 1 + ] + [ aux>> [ length ] [ 0 ] if* ] bi + write-int32 ; inline + TYPED: write-string ( string: string -- ) - '[ _ write-cstring ] with-length-prefix-excl ; inline + dup write-string-length write-cstring ; inline TYPED: write-boolean ( bool: boolean -- ) [ 1 write1 ] [ 0 write1 ] if ; inline