From 5da056642665f5bb3f5ab3901333164a6504b1ca Mon Sep 17 00:00:00 2001 From: Sascha Matzke Date: Fri, 27 Mar 2009 16:33:49 +0100 Subject: [PATCH] performance improvements --- bson/reader/reader.factor | 35 +++++++++---------------- bson/writer/writer.factor | 2 +- mongodb/benchmark/benchmark.factor | 42 +++++++++++++++++++++++------- mongodb/driver/driver.factor | 2 +- 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/bson/reader/reader.factor b/bson/reader/reader.factor index f39d4a21d6..7e81fd5e25 100644 --- a/bson/reader/reader.factor +++ b/bson/reader/reader.factor @@ -1,6 +1,6 @@ USING: accessors assocs bson.constants byte-arrays byte-vectors fry io io.binary io.encodings.string io.encodings.utf8 kernel math namespaces -sequences serialize arrays calendar ; +sequences serialize arrays calendar io.encodings ; IN: bson.reader @@ -41,6 +41,9 @@ GENERIC: element-read ( type -- cont? ) GENERIC: element-data-read ( type -- object ) GENERIC: element-binary-read ( length type -- object ) +: byte-arrary>number ( seq -- number ) + byte-array>bignum >integer ; inline + : get-state ( -- state ) state get ; inline @@ -48,13 +51,13 @@ GENERIC: element-binary-read ( length type -- object ) [ get-state ] dip '[ _ + ] change-read drop ; inline : read-int32 ( -- int32 ) - 4 [ read le> ] [ count-bytes ] bi ; inline + 4 [ read byte-array>number ] [ count-bytes ] bi ; inline : read-longlong ( -- longlong ) - 8 [ read le> ] [ count-bytes ] bi ; inline + 8 [ read byte-array>number ] [ count-bytes ] bi ; inline : read-double ( -- double ) - 8 [ read le> bits>double ] [ count-bytes ] bi ; inline + 8 [ read byte-array>number bits>double ] [ count-bytes ] bi ; inline : read-byte-raw ( -- byte-raw ) 1 [ read ] [ count-bytes ] bi ; inline @@ -62,21 +65,12 @@ GENERIC: element-binary-read ( length type -- object ) : read-byte ( -- byte ) read-byte-raw first ; inline -: (read-cstring) ( acc -- ) - [ read-byte-raw first ] dip ! b acc - 2dup push ! b acc - [ 0 = ] dip ! bool acc - '[ _ (read-cstring) ] unless ; inline recursive - : read-cstring ( -- string ) - BV{ } clone - [ (read-cstring) ] keep - [ zero? ] trim-tail - >byte-array utf8 decode ; inline + input-stream get utf8 + "\0" swap stream-read-until drop ; inline : read-sized-string ( length -- string ) - [ read ] [ count-bytes ] bi - [ zero? ] trim-tail utf8 decode ; inline + drop read-cstring ; inline : read-element-type ( -- type ) read-byte ; inline @@ -128,14 +122,11 @@ M: bson-eoo element-read ( type -- cont? ) M: bson-not-eoo element-read ( type -- cont? ) [ peek-scope ] dip ! scope type - '[ _ - read-cstring push-element [ name>> ] [ type>> ] bi + '[ _ read-cstring push-element [ name>> ] [ type>> ] bi [ element-data-read ] keep end-element swap - ] dip - set-at - t ; + ] dip set-at t ; : [scope-changer] ( state -- state quot ) dup exemplar>> '[ [ [ _ clone ] dip push ] keep ] ; inline @@ -212,4 +203,4 @@ PRIVATE> : stream>assoc ( exemplar -- assoc bytes-read ) dup state [ read-int32 >>size read-elements ] with-variable - [ result>> ] [ read>> ] bi ; + [ result>> ] [ read>> ] bi ; inline diff --git a/bson/writer/writer.factor b/bson/writer/writer.factor index 6684888ad0..4c94840888 100644 --- a/bson/writer/writer.factor +++ b/bson/writer/writer.factor @@ -82,7 +82,7 @@ M: quotation bson-type? ( quotation -- type ) drop T_Binary ; M: byte-array bson-type? ( byte-array -- type ) drop T_Binary ; : write-utf8-string ( string -- ) - output-stream get '[ _ swap char>utf8 ] each ; inline + output-stream get utf8 stream-write ; inline : write-byte ( byte -- ) CHAR-SIZE >le-stream ; inline : write-int32 ( int -- ) INT32-SIZE >le-stream ; inline diff --git a/mongodb/benchmark/benchmark.factor b/mongodb/benchmark/benchmark.factor index effac96b2c..424aa7732c 100644 --- a/mongodb/benchmark/benchmark.factor +++ b/mongodb/benchmark/benchmark.factor @@ -1,5 +1,5 @@ -USING: calendar math fry kernel assocs math.ranges -sequences formatting combinators namespaces io tools.time prettyprint +USING: calendar math fry kernel assocs math.ranges bson.reader io.streams.byte-array +sequences formatting combinators namespaces io tools.time prettyprint io.encodings.binary accessors words mongodb.driver strings math.parser tools.walker bson.writer ; IN: mongodb.benchmark @@ -13,7 +13,7 @@ SYMBOL: collection dup string? [ string>number ] when ; inline : trial-size ( -- size ) - "per-trial" 10000 get* ensure-number ; inline flushable + "per-trial" 5000 get* ensure-number ; inline flushable : batch-size ( -- size ) "batch-size" 100 get* ensure-number ; inline flushable @@ -169,6 +169,13 @@ CONSTANT: DOC-LARGE H{ { "base_url" "http://www.example.com/test-me" } result get batch>> [ '[ _ _ (insert-batch) ] ] [ '[ _ _ (insert) ] ] if ; +: serialize ( doc-quot: ( i -- doc ) -- quot: ( -- ) ) + '[ trial-size [ _ call assoc>bv drop ] each-integer ] ; inline + +: deserialize ( doc-quot: ( i -- doc ) -- quot: ( -- ) ) + [ 0 ] dip call assoc>bv + '[ trial-size [ _ binary [ H{ } stream>assoc 2drop ] with-byte-reader ] times ] ; inline + : check-for-key ( assoc key -- ) CHECK-KEY [ swap key? [ "ups... where's the key" throw ] unless ] [ 2drop ] if ; inline @@ -213,14 +220,14 @@ CONSTANT: DOC-LARGE H{ { "base_url" "http://www.example.com/test-me" } lasterror>> bchar trial-size ] dip 1000000 / /i - "%-18s: {batch:%s,index:%s;errchk:%s} %10s op/s" + "%-18s: {batch:%s,index:%s;errchk:%s} %10s docs/s" sprintf print flush ; inline : print-separator ( -- ) - "--------------------------------------------------------------" print flush ; inline + "----------------------------------------------------------------" print flush ; inline : print-separator-bold ( -- ) - "==============================================================" print flush ; inline + "================================================================" print flush ; inline : print-header ( -- ) trial-size @@ -238,7 +245,17 @@ CONSTANT: DOC-LARGE H{ { "base_url" "http://www.example.com/test-me" } '[ [ [ _ execute ] dip [ execute ] each _ execute benchmark ] with-result ] each print-separator ] ; inline - + +: run-serialization-bench ( doc-word-seq feat-seq -- ) + "Serialization Tests" print + print-separator-bold + \ serialize bench-quot each ; inline + +: run-deserialization-bench ( doc-word-seq feat-seq -- ) + "Deserialization Tests" print + print-separator-bold + \ deserialize bench-quot each ; inline + : run-insert-bench ( doc-word-seq feat-seq -- ) "Insert Tests" print print-separator-bold @@ -262,8 +279,15 @@ CONSTANT: DOC-LARGE H{ { "base_url" "http://www.example.com/test-me" } : run-benchmarks ( -- ) "db" "db" get* "host" "127.0.0.1" get* "port" 27020 get* ensure-number - [ ensure-buffer - print-header + [ print-header + ! serialization + { small-doc-prepare medium-doc-prepare + large-doc-prepare } + { { } } run-serialization-bench + ! deserialization + { small-doc-prepare medium-doc-prepare + large-doc-prepare } + { { } } run-deserialization-bench ! insert { small-doc-prepare medium-doc-prepare large-doc-prepare } diff --git a/mongodb/driver/driver.factor b/mongodb/driver/driver.factor index 7e94f6d035..430f94f0cd 100644 --- a/mongodb/driver/driver.factor +++ b/mongodb/driver/driver.factor @@ -70,7 +70,7 @@ SYNTAX: r/ ( token -- mdbregexp ) : with-db ( mdb quot -- ... ) [ [ prepare-mdb-session ] dip - [ [ >>mdb-stream ] keep ] prepose + [ >>mdb-stream ] prepose with-disposal ] with-scope ; inline