From 7922628abbd5f6ba96a2922be7854299c89e85d1 Mon Sep 17 00:00:00 2001 From: Philipp Winkler Date: Fri, 29 May 2009 14:41:24 -0700 Subject: [PATCH] Add a hand parser. Improves speed from 23 seconds to 0.03 seconds when parsing a 123Kb string. --- basis/json/reader/authors.txt | 2 + basis/json/reader/reader-tests.factor | 2 + basis/json/reader/reader.factor | 134 ++++++++++++++++---------- 3 files changed, 89 insertions(+), 49 deletions(-) diff --git a/basis/json/reader/authors.txt b/basis/json/reader/authors.txt index 44b06f94bc..d269b4ffb5 100644 --- a/basis/json/reader/authors.txt +++ b/basis/json/reader/authors.txt @@ -1 +1,3 @@ Chris Double +Peter Burns +Philipp Winkler diff --git a/basis/json/reader/reader-tests.factor b/basis/json/reader/reader-tests.factor index e97d45babe..fa6c8d7d3a 100644 --- a/basis/json/reader/reader-tests.factor +++ b/basis/json/reader/reader-tests.factor @@ -19,6 +19,8 @@ IN: json.reader.tests { 10.25 } [ "1025e-2" json> ] unit-test { 0.125 } [ "0.125" json> ] unit-test { -0.125 } [ "-0.125" json> ] unit-test +{ -0.00125 } [ "-0.125e-2" json> ] unit-test +{ -012.5 } [ "-0.125e+2" json> ] unit-test ! not widely supported by javascript, but allowed in the grammar, and a nice ! feature to get diff --git a/basis/json/reader/reader.factor b/basis/json/reader/reader.factor index 887a7a50e5..1544af55cf 100644 --- a/basis/json/reader/reader.factor +++ b/basis/json/reader/reader.factor @@ -1,61 +1,97 @@ -! Copyright (C) 2008 Peter Burns. +! Copyright (C) 2008 Peter Burns, 2009 Philipp Winkler ! See http://factorcode.org/license.txt for BSD license. -USING: kernel peg peg.ebnf math.parser math.parser.private strings math -math.functions sequences arrays vectors hashtables assocs -prettyprint json ; +USING: arrays assocs combinators io io.streams.string json +kernel math math.parser math.parser.private sequences strings ; IN: json.reader float ] + [ [ "eE." index ] any? [ >integer ] unless ] bi + ] dip ; -: grammar-list>vector ( seq -- vec ) first2 values swap prefix ; +DEFER: j-string + +: convert-string ( str -- str ) + read1 + { + { CHAR: b [ 8 ] } + { CHAR: f [ 12 ] } + { CHAR: n [ CHAR: \n ] } + { CHAR: r [ CHAR: \r ] } + { CHAR: t [ CHAR: \t ] } + { CHAR: u [ 4 read hex> ] } + [ ] + } case + dup + [ 1string append j-string append ] + [ drop ] if ; + +: j-string ( -- str ) + "\\\"" read-until CHAR: \" = + [ convert-string ] unless ; + +: second-last ( seq -- second-last ) + [ length 2 - ] keep nth ; inline -! Grammar for JSON from RFC 4627 -EBNF: (json>) +: third-last ( seq -- third-last ) + [ length 3 - ] keep nth ; inline + +: last2 ( seq -- second-last last ) + [ second-last ] [ last ] bi ; inline -ws = (" " | "\r" | "\t" | "\n")* +: last3 ( seq -- third-last second-last last ) + [ third-last ] [ last2 ] bi ; inline -true = "true" => [[ t ]] -false = "false" => [[ f ]] -null = "null" => [[ json-null ]] +: v-over-push ( vec -- vec' ) + dup length 2 >= + [ + dup + [ pop ] + [ last ] bi push + ] when ; -hex = [0-9a-fA-F] -char = '\\"' [[ CHAR: " ]] - | "\\\\" [[ CHAR: \ ]] - | "\\/" [[ CHAR: / ]] - | "\\b" [[ 8 ]] - | "\\f" [[ 12 ]] - | "\\n" [[ CHAR: \n ]] - | "\\r" [[ CHAR: \r ]] - | "\\t" [[ CHAR: \t ]] - | "\\u" (hex hex hex hex) [[ hex> ]] => [[ second ]] - | [^"\] -string = '"' char*:cs '"' => [[ cs >string ]] +: v-pick-push ( vec -- vec' ) + dup length 3 >= + [ + dup + [ pop ] + [ second-last ] bi push + ] when ; -sign = ("-" | "+")? => [[ "-" = "-" "" ? ]] -digits = [0-9]+ => [[ >string ]] -decimal = "." digits => [[ concat ]] -exp = ("e" | "E") sign digits => [[ concat ]] -number = sign digits decimal? exp? => [[ dup concat swap fourth [ string>float ] [ string>number ] if ]] - -elements = value ("," value)* => [[ grammar-list>vector ]] -array = "[" elements?:arr "]" => [[ arr >array ]] - -pair = ws string:key ws ":" value:val => [[ { key val } ]] -members = pair ("," pair)* => [[ grammar-list>vector ]] -object = "{" members?:hash "}" => [[ hash >hashtable ]] - -val = true - | false - | null - | string - | number - | array - | object - -value = ws val:v ws => [[ v ]] - -;EBNF +: (close-hash) ( accum -- accum' ) + dup length 3 >= [ v-over-push ] when + dup dup [ pop ] dip pop swap + zip H{ } assoc-clone-like over push ; + +: scan ( accum char -- accum ) + [ + { + { CHAR: \" [ j-string over push ] } + { CHAR: [ [ V{ } clone over push ] } + { CHAR: , [ v-over-push ] } + { CHAR: ] [ v-over-push dup pop >array over push ] } + { CHAR: { [ 2 [ V{ } clone over push ] times ] } + { CHAR: : [ v-pick-push ] } + { CHAR: } [ (close-hash) ] } + { CHAR: \u000020 [ ] } + { CHAR: \t [ ] } + { CHAR: \r [ ] } + { CHAR: \n [ ] } + { CHAR: t [ 3 read drop t over push ] } + { CHAR: f [ 4 read drop f over push ] } + { CHAR: n [ 3 read drop json-null over push ] } + [ value [ over push ] dip [ scan ] when* ] + } case + ] when* ; +: (json-parser>) ( string -- object ) + [ V{ } clone [ read1 dup ] [ scan ] while drop first ] with-string-reader ; + PRIVATE> - -: json> ( string -- object ) (json>) ; \ No newline at end of file + +: json> ( string -- object ) + (json-parser>) ; \ No newline at end of file