json.reader: read UTF-16 surrogate pairs

db4
Jon Harper 2015-06-11 00:25:19 +02:00 committed by Doug Coleman
parent 59ba0ef341
commit 496ff53f22
2 changed files with 22 additions and 5 deletions

View File

@ -32,6 +32,7 @@ IN: json.reader.tests
{ "ß∂¬ƒ˚∆" } [ """ "ß∂¬ƒ˚∆"""" json> ] unit-test { "ß∂¬ƒ˚∆" } [ """ "ß∂¬ƒ˚∆"""" json> ] unit-test
{ 8 9 10 12 13 34 47 92 } >string 1array [ """ "\\b\\t\\n\\f\\r\\"\\/\\\\" """ json> ] unit-test { 8 9 10 12 13 34 47 92 } >string 1array [ """ "\\b\\t\\n\\f\\r\\"\\/\\\\" """ json> ] unit-test
{ 0xabcd } >string 1array [ """ "\\uaBCd" """ json> ] unit-test { 0xabcd } >string 1array [ """ "\\uaBCd" """ json> ] unit-test
{ "𝄞" } [ "\"\\ud834\\udd1e\"" json> ] unit-test
{ H{ { "a" { } } { "b" 123 } } } [ "{\"a\":[],\"b\":123}" json> ] unit-test { H{ { "a" { } } { "b" 123 } } } [ "{\"a\":[],\"b\":123}" json> ] unit-test
{ { } } [ "[]" json> ] unit-test { { } } [ "[]" json> ] unit-test

View File

@ -3,7 +3,8 @@
USING: arrays assocs combinators fry hashtables io USING: arrays assocs combinators fry hashtables io
io.streams.string json kernel kernel.private math math.parser io.streams.string json kernel kernel.private math math.parser
namespaces sbufs sequences sequences.private strings vectors ; namespaces sbufs sequences sequences.private strings vectors
math.order ;
IN: json.reader IN: json.reader
@ -13,8 +14,26 @@ IN: json.reader
[ 1string ] [ "\s\t\r\n,:}]" swap stream-read-until ] bi* [ 1string ] [ "\s\t\r\n,:}]" swap stream-read-until ] bi*
[ append string>number ] dip ; [ append string>number ] dip ;
: json-expect ( token stream -- )
[ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline
DEFER: (read-json-string) DEFER: (read-json-string)
: decode-utf16-surrogate-pair ( hex1 hex2 -- char )
[ 0x3ff bitand ] bi@ [ 10 shift ] dip bitor 0x10000 + ;
: stream-read-4hex ( stream -- hex ) 4 swap stream-read hex> ;
: first-surrogate? ( hex -- ? ) 0xd800 0xdbff between? ;
: read-second-surrogate ( stream -- hex )
"\\u" over json-expect stream-read-4hex ;
: read-json-escape-unicode ( stream -- char )
[ stream-read-4hex ] keep over first-surrogate? [
read-second-surrogate decode-utf16-surrogate-pair
] [ drop ] if ;
: (read-json-escape) ( stream accum -- accum ) : (read-json-escape) ( stream accum -- accum )
{ sbuf } declare { sbuf } declare
over stream-read1 { over stream-read1 {
@ -26,7 +45,7 @@ DEFER: (read-json-string)
{ CHAR: n [ CHAR: \n ] } { CHAR: n [ CHAR: \n ] }
{ CHAR: r [ CHAR: \r ] } { CHAR: r [ CHAR: \r ] }
{ CHAR: t [ CHAR: \t ] } { CHAR: t [ CHAR: \t ] }
{ CHAR: u [ 4 pick stream-read hex> ] } { CHAR: u [ over read-json-escape-unicode ] }
[ ] [ ]
} case [ suffix! (read-json-string) ] [ json-error ] if* ; } case [ suffix! (read-json-string) ] [ json-error ] if* ;
@ -78,9 +97,6 @@ DEFER: (read-json-string)
: json-close-hash ( accum -- accum ) : json-close-hash ( accum -- accum )
v-close dup dup [ v-pop ] bi@ swap H{ } zip-as suffix! ; v-close dup dup [ v-pop ] bi@ swap H{ } zip-as suffix! ;
: json-expect ( token stream -- )
[ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline
: scan ( stream accum char -- stream accum ) : scan ( stream accum char -- stream accum )
! 2dup 1string swap . . ! Great for debug... ! 2dup 1string swap . . ! Great for debug...
{ object vector object } declare { object vector object } declare