From 61a3e4e814e4e8522ff0bfa4472ff76cb2f3c549 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sun, 1 Nov 2009 01:26:05 -0500 Subject: [PATCH] new, faster one-pass number parser --- basis/hints/hints.factor | 8 +- core/math/parser/parser-docs.factor | 7 - core/math/parser/parser-tests.factor | 54 +++- core/math/parser/parser.factor | 358 ++++++++++++++++++--------- 4 files changed, 298 insertions(+), 129 deletions(-) diff --git a/basis/hints/hints.factor b/basis/hints/hints.factor index 1ca5bf1bc5..066af9d701 100644 --- a/basis/hints/hints.factor +++ b/basis/hints/hints.factor @@ -3,7 +3,7 @@ USING: accessors arrays assocs byte-arrays byte-vectors classes combinators definitions effects fry generic generic.single generic.standard hashtables io.binary io.streams.string kernel -kernel.private math math.integers.private math.parser math.parser.private +kernel.private math math.integers.private math.parser namespaces parser sbufs sequences splitting splitting.private strings vectors words ; IN: hints @@ -135,10 +135,4 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop -\ dec>float { string } "specializer" set-word-prop - -\ hex>float { string } "specializer" set-word-prop - -\ string>integer { string fixnum } "specializer" set-word-prop - \ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop diff --git a/core/math/parser/parser-docs.factor b/core/math/parser/parser-docs.factor index cd0bb47bd5..9317bc4d6c 100644 --- a/core/math/parser/parser-docs.factor +++ b/core/math/parser/parser-docs.factor @@ -111,13 +111,6 @@ HELP: >hex } } ; -HELP: string>float ( str -- n/f ) -{ $values { "str" string } { "n/f" "a real number or " { $link f } } } -{ $description "Primitive for creating a float from a string representation." } -{ $notes "The " { $link string>number } " word is more general." -$nl -"Outputs " { $link f } " if the string does not represent a float." } ; - HELP: float>string { $values { "n" real } { "str" string } } { $description "Primitive for getting a string representation of a float." } diff --git a/core/math/parser/parser-tests.factor b/core/math/parser/parser-tests.factor index 34bca8a34e..e885b23278 100644 --- a/core/math/parser/parser-tests.factor +++ b/core/math/parser/parser-tests.factor @@ -25,6 +25,21 @@ unit-test [ "e" string>number ] unit-test +[ 1/2 ] [ "1/2" string>number ] unit-test +[ -1/2 ] [ "-1/2" string>number ] unit-test +[ 2 ] [ "4/2" string>number ] unit-test +[ f ] [ "1/-2" string>number ] unit-test +[ f ] [ "1/2/3" string>number ] unit-test +[ 1+1/2 ] [ "1+1/2" string>number ] unit-test +[ f ] [ "1-1/2" string>number ] unit-test +[ -1-1/2 ] [ "-1-1/2" string>number ] unit-test +[ f ] [ "-1+1/2" string>number ] unit-test +[ f ] [ "1+2" string>number ] unit-test +[ f ] [ "1+" string>number ] unit-test +[ f ] [ "1-" string>number ] unit-test +[ f ] [ "+1" string>number ] unit-test +[ f ] [ "1+1/2+2" string>number ] unit-test + [ 100000 ] [ "100,000" string>number ] unit-test [ 100000.0 ] [ "100,000.0" string>number ] unit-test @@ -37,25 +52,54 @@ unit-test [ f ] [ "-,2" string>number ] unit-test [ 2.0 ] [ "2." string>number ] unit-test +[ 0.25 ] [ ".25" string>number ] unit-test +[ -2.0 ] [ "-2." string>number ] unit-test +[ -0.25 ] [ "-.25" string>number ] unit-test +[ f ] [ "-." string>number ] unit-test [ 255 ] [ "ff" hex> ] unit-test +[ 100.0 ] [ "1.0e2" string>number ] unit-test +[ 100.0 ] [ "100.0" string>number ] unit-test +[ 100.0 ] [ "100." string>number ] unit-test + +[ HEX: 1.999999999999ap-3 ] [ "0.2" string>number ] unit-test +[ HEX: 1.3333333333333p0 ] [ "1.2" string>number ] unit-test +[ HEX: 1.5555555555555p0 ] [ "1.333,333,333,333,333,333" string>number ] unit-test +[ HEX: 1.aaaaaaaaaaaabp0 ] [ "1.666,666,666,666,666,666" string>number ] unit-test + [ "100.0" ] [ "1.0e2" string>number number>string ] unit-test +[ -100.0 ] [ "-1.0e2" string>number ] unit-test +[ -100.0 ] [ "-100.0" string>number ] unit-test +[ -100.0 ] [ "-100." string>number ] unit-test + [ "-100.0" ] [ "-1.0e2" string>number number>string ] unit-test +[ -100.0 ] [ "-1.e2" string>number ] unit-test + [ "0.01" ] [ "1.0e-2" string>number number>string ] unit-test +[ 0.01 ] [ "1.0e-2" string>number ] unit-test + [ "-0.01" ] [ "-1.0e-2" string>number number>string ] unit-test +[ -0.01 ] [ "-1.0e-2" string>number ] unit-test + +[ "-0.01" ] +[ "-1.e-2" string>number number>string ] +unit-test + +[ -1.0e-12 ] [ "-1.0e-12" string>number ] unit-test + [ t ] [ "-1.0e-12" string>number number>string { "-1.0e-12" "-1.0e-012" } member? ] unit-test @@ -71,7 +115,7 @@ unit-test [ f ] [ "." string>number ] unit-test - + [ f ] [ ".e" string>number ] unit-test @@ -96,6 +140,10 @@ unit-test [ "1e1/2" string>number ] unit-test +[ f ] +[ "1e1.2" string>number ] +unit-test + [ f ] [ "e/2" string>number ] unit-test @@ -122,6 +170,8 @@ unit-test [ -1/0. ] [ "-1/0." string>number ] unit-test +[ -0.5 ] [ "-1/2." string>number ] unit-test + [ "-0.0" ] [ -0.0 number>string ] unit-test [ "-3/4" ] [ -3/4 number>string ] unit-test @@ -139,6 +189,8 @@ unit-test [ 1.0 ] [ "1.0" hex> ] unit-test [ 1.5 ] [ "1.8" hex> ] unit-test +[ 1.875 ] [ "1.e" hex> ] unit-test +[ 1.90625 ] [ "1.e8" hex> ] unit-test [ 1.03125 ] [ "1.08" hex> ] unit-test [ 15.5 ] [ "f.8" hex> ] unit-test [ 15.53125 ] [ "f.88" hex> ] unit-test diff --git a/core/math/parser/parser.factor b/core/math/parser/parser.factor index f04c0104a5..6138642162 100644 --- a/core/math/parser/parser.factor +++ b/core/math/parser/parser.factor @@ -1,16 +1,245 @@ -! Copyright (C) 2004, 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: kernel math.private namespaces sequences sequences.private -strings arrays combinators splitting math assocs byte-arrays make ; +! (c)2009 Joe Groff bsd license +USING: accessors combinators kernel math +namespaces sequences sequences.private splitting strings make ; IN: math.parser : digit> ( ch -- n ) - 127 bitand { + { { [ dup CHAR: 9 <= ] [ CHAR: 0 - ] } { [ dup CHAR: a < ] [ CHAR: A 10 - - ] } [ CHAR: a 10 - - ] } cond - dup 0 < [ drop 255 ] [ dup 16 >= [ drop 255 ] when ] if ; inline + dup 0 < [ drop 255 ] when ; inline + + ( str radix -- i number-parse n ) + [ 0 ] 2dip + [ dup length ] dip + number-parse boa + 0 ; inline + +: (next-digit) ( i number-parse n digit-quot end-quot -- number/f ) + [ 2over length>> < ] 2dip + [ [ 2over str>> nth-unsafe >fixnum [ 1 + >fixnum ] 3dip ] prepose ] dip if ; inline + +: require-next-digit ( i number-parse n quot -- number/f ) + [ 3drop f ] (next-digit) ; inline + +: next-digit ( i number-parse n quot -- number/f ) + [ 2nip ] (next-digit) ; inline + +: add-digit ( i number-parse n digit quot -- number/f ) + [ [ dup radix>> ] [ * ] [ + ] tri* ] dip next-digit ; inline + +: digit-in-radix ( number-parse n char -- number-parse n digit ? ) + digit> pick radix>> over > ; inline + +: ?make-ratio ( num denom/f -- ratio/f ) + [ / ] [ drop f ] if* ; inline + +TUPLE: float-parse + { radix read-only } + { point read-only } + { exponent read-only } ; + +: inc-point ( float-parse -- float-parse' ) + [ radix>> ] [ point>> 1 + ] [ exponent>> ] tri float-parse boa ; inline + +: store-exponent ( float-parse n expt -- float-parse' n ) + swap [ [ drop radix>> ] [ drop point>> ] [ nip ] 2tri float-parse boa ] dip ; inline + +: ?store-exponent ( float-parse n expt/f -- float-parse' n/f ) + [ store-exponent ] [ drop f ] if* ; inline + +: ((pow)) ( base x -- base^x ) + iota 1 rot [ nip * ] curry reduce ; inline +: (pow) ( base x -- base^x ) + dup 0 >= [ ((pow)) ] [ [ recip ] [ neg ] bi* ((pow)) ] if ; inline + +: add-mantissa-digit ( float-parse i number-parse n digit quot -- float-parse' number/f ) + [ [ inc-point ] 4dip ] dip add-digit ; inline + +: make-float-dec-exponent ( float-parse n/f -- float/f ) + [ [ radix>> ] [ point>> ] [ exponent>> ] tri - (pow) ] [ swap /f ] bi* ; inline + +: make-float-bin-exponent ( float-parse n/f -- float/f ) + [ drop [ radix>> ] [ point>> ] bi (pow) ] + [ nip swap /f ] + [ drop 2.0 swap exponent>> (pow) * ] 2tri ; inline + +: ?make-float ( float-parse n/f -- float/f ) + { + { [ dup not ] [ 2drop f ] } + { [ over radix>> 10 = ] [ make-float-dec-exponent ] } + [ make-float-bin-exponent ] + } cond ; inline + +: ?neg ( n/f -- -n/f ) + [ neg ] [ f ] if* ; inline + +: ?add-ratio ( m n/f -- m+n/f ) + dup ratio? [ + ] [ 2drop f ] if ; inline + +: @abort ( i number-parse n x -- f ) + 2drop 2drop f ; inline + +: @split ( i number-parse n -- n i number-parse n' ) + -rot 0 ; inline + +: @split-exponent ( i number-parse n -- n i number-parse' n' ) + -rot [ str>> ] [ length>> ] bi 10 number-parse boa 0 ; inline + +: ( i number-parse n -- float-parse i number-parse n ) + [ drop nip radix>> 0 0 float-parse boa ] 3keep ; inline + +DEFER: @exponent-digit +DEFER: @mantissa-digit +DEFER: @denom-digit +DEFER: @num-digit +DEFER: @pos-digit +DEFER: @neg-digit + +: @exponent-digit-or-punc ( float-parse i number-parse n char -- float-parse number/f ) + { + { CHAR: , [ [ @exponent-digit ] require-next-digit ] } + [ @exponent-digit ] + } case ; inline recursive + +: @exponent-digit ( float-parse i number-parse n char -- float-parse number/f ) + digit-in-radix [ [ @exponent-digit-or-punc ] add-digit ] [ @abort ] if ; inline recursive + +: @exponent-first-char ( float-parse i number-parse n char -- float-parse number/f ) + { + { CHAR: - [ [ @exponent-digit ] require-next-digit ?neg ] } + [ @exponent-digit ] + } case ; inline recursive + +: ->exponent ( float-parse i number-parse n -- float-parse' number/f ) + @split-exponent [ @exponent-first-char ] require-next-digit ?store-exponent ; inline + +: exponent-char? ( number-parse n char -- number-parse n char ? ) + 3dup nip swap radix>> { + { 10 [ [ CHAR: e CHAR: E ] dip [ = ] curry either? ] } + [ drop [ CHAR: p CHAR: P ] dip [ = ] curry either? ] + } case ; inline + +: or-exponent ( i number-parse n char quot -- number/f ) + ! call ; inline + [ exponent-char? [ drop ->exponent ?make-float ] ] dip if ; inline +: or-mantissa->exponent ( float-parse i number-parse n char quot -- float-parse number/f ) + ! call ; inline + [ exponent-char? [ drop ->exponent ] ] dip if ; inline + +: @mantissa-digit-or-punc ( float-parse i number-parse n char -- float-parse number/f ) + { + { CHAR: , [ [ @mantissa-digit ] require-next-digit ] } + [ @mantissa-digit ] + } case ; inline recursive + +: @mantissa-digit ( float-parse i number-parse n char -- float-parse number/f ) + [ + digit-in-radix + [ [ @mantissa-digit-or-punc ] add-mantissa-digit ] + [ @abort ] if + ] or-mantissa->exponent ; inline recursive + +: ->mantissa ( i number-parse n -- number/f ) + [ @mantissa-digit ] next-digit ?make-float ; inline + +: ->required-mantissa ( i number-parse n -- number/f ) + [ @mantissa-digit ] require-next-digit ?make-float ; inline + +: @denom-digit-or-punc ( i number-parse n char -- number/f ) + { + { CHAR: , [ [ @denom-digit ] require-next-digit ] } + { CHAR: . [ ->mantissa ] } + [ [ @denom-digit ] or-exponent ] + } case ; inline recursive + +: @denom-digit ( i number-parse n char -- number/f ) + digit-in-radix [ [ @denom-digit-or-punc ] add-digit ] [ @abort ] if ; inline recursive + +: @denom-first-digit ( i number-parse n char -- number/f ) + { + { CHAR: . [ ->mantissa ] } + [ @denom-digit ] + } case ; inline recursive + +: ->denominator ( i number-parse n -- number/f ) + @split [ @denom-first-digit ] require-next-digit ?make-ratio ; inline + +: @num-digit-or-punc ( i number-parse n char -- number/f ) + { + { CHAR: , [ [ @num-digit ] require-next-digit ] } + { CHAR: / [ ->denominator ] } + [ @num-digit ] + } case ; inline recursive + +: @num-digit ( i number-parse n char -- number/f ) + digit-in-radix [ [ @num-digit-or-punc ] add-digit ] [ @abort ] if ; inline recursive + +: ->numerator ( i number-parse n -- number/f ) + @split [ @num-digit ] require-next-digit ?add-ratio ; inline + +: @pos-digit-or-punc ( i number-parse n char -- number/f ) + { + { CHAR: , [ [ @pos-digit ] require-next-digit ] } + { CHAR: + [ ->numerator ] } + { CHAR: / [ ->denominator ] } + { CHAR: . [ ->mantissa ] } + [ [ @pos-digit ] or-exponent ] + } case ; inline recursive + +: @pos-digit ( i number-parse n char -- number/f ) + digit-in-radix [ [ @pos-digit-or-punc ] add-digit ] [ @abort ] if ; inline recursive + +: @pos-first-digit ( i number-parse n char -- number/f ) + { + { CHAR: . [ ->required-mantissa ] } + [ @pos-digit ] + } case ; inline recursive + +: @neg-digit-or-punc ( i number-parse n char -- number/f ) + { + { CHAR: , [ [ @neg-digit ] require-next-digit ] } + { CHAR: - [ ->numerator ] } + { CHAR: / [ ->denominator ] } + { CHAR: . [ ->mantissa ] } + [ [ @neg-digit ] or-exponent ] + } case ; inline recursive + +: @neg-digit ( i number-parse n char -- number/f ) + digit-in-radix [ [ @neg-digit-or-punc ] add-digit ] [ @abort ] if ; inline recursive + +: @neg-first-digit ( i number-parse n char -- number/f ) + { + { CHAR: . [ ->required-mantissa ] } + [ @neg-digit ] + } case ; inline recursive + +: @first-char ( i number-parse n char -- number/f ) + { + { CHAR: - [ [ @neg-first-digit ] require-next-digit ?neg ] } + [ @pos-first-digit ] + } case ; inline recursive + +PRIVATE> + +: base> ( str radix -- number/f ) + [ @first-char ] require-next-digit ; + +: string>number ( str -- number/f ) 10 base> ; inline + +: bin> ( str -- number/f ) 2 base> ; inline +: oct> ( str -- number/f ) 8 base> ; inline +: dec> ( str -- number/f ) 10 base> ; inline +: hex> ( str -- number/f ) 16 base> ; inline : string>digits ( str -- digits ) [ digit> ] B{ } map-as ; inline @@ -24,114 +253,6 @@ IN: math.parser : digits>integer ( seq radix -- n/f ) [ (digits>integer) ] each-digit ; inline -DEFER: base> - -natural ( seq radix -- n/f ) - over empty? [ 2drop f ] [ - [ over CHAR: , eq? [ 2drop ] [ [ digit> ] dip (digits>integer) ] if ] each-digit - ] if ; - -: sign ( -- str ) negative? get "-" "+" ? ; - -: with-radix ( radix quot -- ) - radix swap with-variable ; inline - -: (base>) ( str -- n ) radix get base> ; - -: whole-part ( str -- m n ) - sign split1 [ (base>) ] dip - dup [ (base>) ] [ drop 0 swap ] if ; - -: string>ratio ( str radix -- a/b ) - [ - "-" ?head dup negative? set swap - "/" split1 (base>) [ whole-part ] dip - 3dup and and [ / + swap [ neg ] when ] [ 2drop 2drop f ] if - ] with-radix ; - -: string>integer ( str radix -- n/f ) - over first-unsafe CHAR: - = [ - [ rest-slice ] dip string>natural dup [ neg ] when - ] [ - string>natural - ] if ; inline - -: dec>float ( str -- n/f ) - [ CHAR: , eq? not ] BV{ } filter-as - 0 over push B{ } like (string>float) ; - -: hex>float-parts ( str -- neg? mantissa-str expt ) - "-" ?head swap "p" split1 [ 10 base> ] [ 0 ] if* ; inline - -: make-mantissa ( str -- bits ) - 16 base> dup log2 52 swap - shift ; inline - -: combine-hex-float-parts ( neg? mantissa expt -- float ) - dup 2046 > [ 2drop -1/0. 1/0. ? ] [ - dup 0 <= [ 1 - shift 0 ] when - [ HEX: 8000,0000,0000,0000 0 ? ] - [ 52 2^ 1 - bitand ] - [ 52 shift ] tri* bitor bitor - bits>double - ] if ; inline - -: hex>float ( str -- n/f ) - hex>float-parts - [ "." split1 [ append make-mantissa ] [ drop 16 base> log2 ] 2bi ] - [ + 1023 + ] bi* - combine-hex-float-parts ; - -: base>float ( str base -- n/f ) - { - { 16 [ hex>float ] } - [ drop dec>float ] - } case ; inline - -: number-char? ( char -- ? ) - "0123456789ABCDEFabcdef." member? ; inline - -: last-unsafe ( seq -- elt ) - [ length 1 - ] [ nth-unsafe ] bi ; inline - -: numeric-looking? ( str -- ? ) - dup empty? [ drop f ] [ - dup first-unsafe number-char? [ - last-unsafe number-char? - ] [ - dup first-unsafe CHAR: - eq? [ - dup length 1 eq? [ drop f ] [ - 1 over nth-unsafe number-char? [ - last-unsafe number-char? - ] [ drop f ] if - ] if - ] [ drop f ] if - ] if - ] if ; inline - -PRIVATE> - -: string>float ( str -- n/f ) - 10 base>float ; inline - -: base> ( str radix -- n/f ) - over numeric-looking? [ - over [ "/." member? ] find nip { - { CHAR: / [ string>ratio ] } - { CHAR: . [ base>float ] } - [ drop string>integer ] - } case - ] [ 2drop f ] if ; - -: string>number ( str -- n/f ) 10 base> ; inline -: bin> ( str -- n/f ) 2 base> ; inline -: oct> ( str -- n/f ) 8 base> ; inline -: hex> ( str -- n/f ) 16 base> ; inline - : >digit ( n -- ch ) dup 10 < [ CHAR: 0 + ] [ 10 - CHAR: a + ] if ; inline @@ -144,6 +265,14 @@ GENERIC# >base 1 ( n radix -- str ) base) ( n -- str ) radix get positive>base ; PRIVATE> @@ -244,3 +373,4 @@ M: float >base : >hex ( n -- str ) 16 >base ; inline : # ( n -- ) number>string % ; inline +