Add a hand parser. Improves speed from 23 seconds to 0.03 seconds when parsing a 123Kb string.

db4
Philipp Winkler 2009-05-29 14:41:24 -07:00
parent 4cead52ba6
commit 7922628abb
3 changed files with 89 additions and 49 deletions

View File

@ -1 +1,3 @@
Chris Double Chris Double
Peter Burns
Philipp Winkler

View File

@ -19,6 +19,8 @@ IN: json.reader.tests
{ 10.25 } [ "1025e-2" json> ] unit-test { 10.25 } [ "1025e-2" json> ] unit-test
{ 0.125 } [ "0.125" json> ] unit-test { 0.125 } [ "0.125" json> ] unit-test
{ -0.125 } [ "-0.125" json> ] unit-test { -0.125 } [ "-0.125" json> ] unit-test
{ -0.00125 } [ "-0.125e-2" json> ] unit-test
{ -012.5 } [ "-0.125e+2" json> ] unit-test
! not widely supported by javascript, but allowed in the grammar, and a nice ! not widely supported by javascript, but allowed in the grammar, and a nice
! feature to get ! feature to get

View File

@ -1,61 +1,97 @@
! Copyright (C) 2008 Peter Burns. ! Copyright (C) 2008 Peter Burns, 2009 Philipp Winkler
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel peg peg.ebnf math.parser math.parser.private strings math USING: arrays assocs combinators io io.streams.string json
math.functions sequences arrays vectors hashtables assocs kernel math math.parser math.parser.private sequences strings ;
prettyprint json ;
IN: json.reader IN: json.reader
<PRIVATE <PRIVATE
: value ( char -- num char )
1string " \t\r\n,:}]" read-until
[
append
[ string>float ]
[ [ "eE." index ] any? [ >integer ] unless ] bi
] dip ;
: grammar-list>vector ( seq -- vec ) first2 values swap prefix ; DEFER: j-string
! Grammar for JSON from RFC 4627 : convert-string ( str -- str )
EBNF: (json>) read1
{
{ CHAR: b [ 8 ] }
{ CHAR: f [ 12 ] }
{ CHAR: n [ CHAR: \n ] }
{ CHAR: r [ CHAR: \r ] }
{ CHAR: t [ CHAR: \t ] }
{ CHAR: u [ 4 read hex> ] }
[ ]
} case
dup
[ 1string append j-string append ]
[ drop ] if ;
ws = (" " | "\r" | "\t" | "\n")* : j-string ( -- str )
"\\\"" read-until CHAR: \" =
[ convert-string ] unless ;
true = "true" => [[ t ]] : second-last ( seq -- second-last )
false = "false" => [[ f ]] [ length 2 - ] keep nth ; inline
null = "null" => [[ json-null ]]
hex = [0-9a-fA-F] : third-last ( seq -- third-last )
char = '\\"' [[ CHAR: " ]] [ length 3 - ] keep nth ; inline
| "\\\\" [[ CHAR: \ ]]
| "\\/" [[ CHAR: / ]]
| "\\b" [[ 8 ]]
| "\\f" [[ 12 ]]
| "\\n" [[ CHAR: \n ]]
| "\\r" [[ CHAR: \r ]]
| "\\t" [[ CHAR: \t ]]
| "\\u" (hex hex hex hex) [[ hex> ]] => [[ second ]]
| [^"\]
string = '"' char*:cs '"' => [[ cs >string ]]
sign = ("-" | "+")? => [[ "-" = "-" "" ? ]] : last2 ( seq -- second-last last )
digits = [0-9]+ => [[ >string ]] [ second-last ] [ last ] bi ; inline
decimal = "." digits => [[ concat ]]
exp = ("e" | "E") sign digits => [[ concat ]]
number = sign digits decimal? exp? => [[ dup concat swap fourth [ string>float ] [ string>number ] if ]]
elements = value ("," value)* => [[ grammar-list>vector ]] : last3 ( seq -- third-last second-last last )
array = "[" elements?:arr "]" => [[ arr >array ]] [ third-last ] [ last2 ] bi ; inline
pair = ws string:key ws ":" value:val => [[ { key val } ]] : v-over-push ( vec -- vec' )
members = pair ("," pair)* => [[ grammar-list>vector ]] dup length 2 >=
object = "{" members?:hash "}" => [[ hash >hashtable ]] [
dup
[ pop ]
[ last ] bi push
] when ;
val = true : v-pick-push ( vec -- vec' )
| false dup length 3 >=
| null [
| string dup
| number [ pop ]
| array [ second-last ] bi push
| object ] when ;
value = ws val:v ws => [[ v ]] : (close-hash) ( accum -- accum' )
dup length 3 >= [ v-over-push ] when
dup dup [ pop ] dip pop swap
zip H{ } assoc-clone-like over push ;
;EBNF : scan ( accum char -- accum )
[
{
{ CHAR: \" [ j-string over push ] }
{ CHAR: [ [ V{ } clone over push ] }
{ CHAR: , [ v-over-push ] }
{ CHAR: ] [ v-over-push dup pop >array over push ] }
{ CHAR: { [ 2 [ V{ } clone over push ] times ] }
{ CHAR: : [ v-pick-push ] }
{ CHAR: } [ (close-hash) ] }
{ CHAR: \u000020 [ ] }
{ CHAR: \t [ ] }
{ CHAR: \r [ ] }
{ CHAR: \n [ ] }
{ CHAR: t [ 3 read drop t over push ] }
{ CHAR: f [ 4 read drop f over push ] }
{ CHAR: n [ 3 read drop json-null over push ] }
[ value [ over push ] dip [ scan ] when* ]
} case
] when* ;
: (json-parser>) ( string -- object )
[ V{ } clone [ read1 dup ] [ scan ] while drop first ] with-string-reader ;
PRIVATE> PRIVATE>
: json> ( string -- object ) (json>) ; : json> ( string -- object )
(json-parser>) ;