json.reader: read UTF-16 surrogate pairs
parent
59ba0ef341
commit
496ff53f22
|
@ -32,6 +32,7 @@ IN: json.reader.tests
|
||||||
{ "ß∂¬ƒ˚∆" } [ """ "ß∂¬ƒ˚∆"""" json> ] unit-test
|
{ "ß∂¬ƒ˚∆" } [ """ "ß∂¬ƒ˚∆"""" json> ] unit-test
|
||||||
{ 8 9 10 12 13 34 47 92 } >string 1array [ """ "\\b\\t\\n\\f\\r\\"\\/\\\\" """ json> ] unit-test
|
{ 8 9 10 12 13 34 47 92 } >string 1array [ """ "\\b\\t\\n\\f\\r\\"\\/\\\\" """ json> ] unit-test
|
||||||
{ 0xabcd } >string 1array [ """ "\\uaBCd" """ json> ] unit-test
|
{ 0xabcd } >string 1array [ """ "\\uaBCd" """ json> ] unit-test
|
||||||
|
{ "𝄞" } [ "\"\\ud834\\udd1e\"" json> ] unit-test
|
||||||
|
|
||||||
{ H{ { "a" { } } { "b" 123 } } } [ "{\"a\":[],\"b\":123}" json> ] unit-test
|
{ H{ { "a" { } } { "b" 123 } } } [ "{\"a\":[],\"b\":123}" json> ] unit-test
|
||||||
{ { } } [ "[]" json> ] unit-test
|
{ { } } [ "[]" json> ] unit-test
|
||||||
|
|
|
@ -3,7 +3,8 @@
|
||||||
|
|
||||||
USING: arrays assocs combinators fry hashtables io
|
USING: arrays assocs combinators fry hashtables io
|
||||||
io.streams.string json kernel kernel.private math math.parser
|
io.streams.string json kernel kernel.private math math.parser
|
||||||
namespaces sbufs sequences sequences.private strings vectors ;
|
namespaces sbufs sequences sequences.private strings vectors
|
||||||
|
math.order ;
|
||||||
|
|
||||||
IN: json.reader
|
IN: json.reader
|
||||||
|
|
||||||
|
@ -13,8 +14,26 @@ IN: json.reader
|
||||||
[ 1string ] [ "\s\t\r\n,:}]" swap stream-read-until ] bi*
|
[ 1string ] [ "\s\t\r\n,:}]" swap stream-read-until ] bi*
|
||||||
[ append string>number ] dip ;
|
[ append string>number ] dip ;
|
||||||
|
|
||||||
|
: json-expect ( token stream -- )
|
||||||
|
[ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline
|
||||||
|
|
||||||
DEFER: (read-json-string)
|
DEFER: (read-json-string)
|
||||||
|
|
||||||
|
: decode-utf16-surrogate-pair ( hex1 hex2 -- char )
|
||||||
|
[ 0x3ff bitand ] bi@ [ 10 shift ] dip bitor 0x10000 + ;
|
||||||
|
|
||||||
|
: stream-read-4hex ( stream -- hex ) 4 swap stream-read hex> ;
|
||||||
|
|
||||||
|
: first-surrogate? ( hex -- ? ) 0xd800 0xdbff between? ;
|
||||||
|
|
||||||
|
: read-second-surrogate ( stream -- hex )
|
||||||
|
"\\u" over json-expect stream-read-4hex ;
|
||||||
|
|
||||||
|
: read-json-escape-unicode ( stream -- char )
|
||||||
|
[ stream-read-4hex ] keep over first-surrogate? [
|
||||||
|
read-second-surrogate decode-utf16-surrogate-pair
|
||||||
|
] [ drop ] if ;
|
||||||
|
|
||||||
: (read-json-escape) ( stream accum -- accum )
|
: (read-json-escape) ( stream accum -- accum )
|
||||||
{ sbuf } declare
|
{ sbuf } declare
|
||||||
over stream-read1 {
|
over stream-read1 {
|
||||||
|
@ -26,7 +45,7 @@ DEFER: (read-json-string)
|
||||||
{ CHAR: n [ CHAR: \n ] }
|
{ CHAR: n [ CHAR: \n ] }
|
||||||
{ CHAR: r [ CHAR: \r ] }
|
{ CHAR: r [ CHAR: \r ] }
|
||||||
{ CHAR: t [ CHAR: \t ] }
|
{ CHAR: t [ CHAR: \t ] }
|
||||||
{ CHAR: u [ 4 pick stream-read hex> ] }
|
{ CHAR: u [ over read-json-escape-unicode ] }
|
||||||
[ ]
|
[ ]
|
||||||
} case [ suffix! (read-json-string) ] [ json-error ] if* ;
|
} case [ suffix! (read-json-string) ] [ json-error ] if* ;
|
||||||
|
|
||||||
|
@ -78,9 +97,6 @@ DEFER: (read-json-string)
|
||||||
: json-close-hash ( accum -- accum )
|
: json-close-hash ( accum -- accum )
|
||||||
v-close dup dup [ v-pop ] bi@ swap H{ } zip-as suffix! ;
|
v-close dup dup [ v-pop ] bi@ swap H{ } zip-as suffix! ;
|
||||||
|
|
||||||
: json-expect ( token stream -- )
|
|
||||||
[ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline
|
|
||||||
|
|
||||||
: scan ( stream accum char -- stream accum )
|
: scan ( stream accum char -- stream accum )
|
||||||
! 2dup 1string swap . . ! Great for debug...
|
! 2dup 1string swap . . ! Great for debug...
|
||||||
{ object vector object } declare
|
{ object vector object } declare
|
||||||
|
|
Loading…
Reference in New Issue