diff --git a/basis/json/reader/reader-tests.factor b/basis/json/reader/reader-tests.factor index dcd45f1876..f33a127762 100644 --- a/basis/json/reader/reader-tests.factor +++ b/basis/json/reader/reader-tests.factor @@ -32,6 +32,7 @@ IN: json.reader.tests { "ß∂¬ƒ˚∆" } [ """ "ß∂¬ƒ˚∆"""" json> ] unit-test { 8 9 10 12 13 34 47 92 } >string 1array [ """ "\\b\\t\\n\\f\\r\\"\\/\\\\" """ json> ] unit-test { 0xabcd } >string 1array [ """ "\\uaBCd" """ json> ] unit-test +{ "𝄞" } [ "\"\\ud834\\udd1e\"" json> ] unit-test { H{ { "a" { } } { "b" 123 } } } [ "{\"a\":[],\"b\":123}" json> ] unit-test { { } } [ "[]" json> ] unit-test diff --git a/basis/json/reader/reader.factor b/basis/json/reader/reader.factor index d6af2c49d2..91419399e0 100644 --- a/basis/json/reader/reader.factor +++ b/basis/json/reader/reader.factor @@ -3,7 +3,8 @@ USING: arrays assocs combinators fry hashtables io io.streams.string json kernel kernel.private math math.parser -namespaces sbufs sequences sequences.private strings vectors ; +namespaces sbufs sequences sequences.private strings vectors +math.order ; IN: json.reader @@ -13,8 +14,26 @@ IN: json.reader [ 1string ] [ "\s\t\r\n,:}]" swap stream-read-until ] bi* [ append string>number ] dip ; +: json-expect ( token stream -- ) + [ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline + DEFER: (read-json-string) +: decode-utf16-surrogate-pair ( hex1 hex2 -- char ) + [ 0x3ff bitand ] bi@ [ 10 shift ] dip bitor 0x10000 + ; + +: stream-read-4hex ( stream -- hex ) 4 swap stream-read hex> ; + +: first-surrogate? ( hex -- ? ) 0xd800 0xdbff between? ; + +: read-second-surrogate ( stream -- hex ) + "\\u" over json-expect stream-read-4hex ; + +: read-json-escape-unicode ( stream -- char ) + [ stream-read-4hex ] keep over first-surrogate? [ + read-second-surrogate decode-utf16-surrogate-pair + ] [ drop ] if ; + : (read-json-escape) ( stream accum -- accum ) { sbuf } declare over stream-read1 { @@ -26,7 +45,7 @@ DEFER: (read-json-string) { CHAR: n [ CHAR: \n ] } { CHAR: r [ CHAR: \r ] } { CHAR: t [ CHAR: \t ] } - { CHAR: u [ 4 pick stream-read hex> ] } + { CHAR: u [ over read-json-escape-unicode ] } [ ] } case [ suffix! (read-json-string) ] [ json-error ] if* ; @@ -78,9 +97,6 @@ DEFER: (read-json-string) : json-close-hash ( accum -- accum ) v-close dup dup [ v-pop ] bi@ swap H{ } zip-as suffix! ; -: json-expect ( token stream -- ) - [ dup length ] [ stream-read ] bi* = [ json-error ] unless ; inline - : scan ( stream accum char -- stream accum ) ! 2dup 1string swap . . ! Great for debug... { object vector object } declare