factor/core/math/parser/parser.factor

593 lines
18 KiB
Factor

! Copyright (C) 2009 Joe Groff, 2013 John Benediktsson
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays combinators kernel kernel.private layouts
make math math.private sbufs sequences sequences.private strings ;
IN: math.parser
<PRIVATE
PRIMITIVE: (format-float) ( n fill width precision format locale -- byte-array )
PRIVATE>
: digit> ( ch -- n )
{
{ [ dup CHAR: 9 <= ] [ CHAR: 0 - dup 0 < [ drop 255 ] when ] }
{ [ dup CHAR: a < ] [ CHAR: A 10 - - dup 10 < [ drop 255 ] when ] }
[ CHAR: a 10 - - dup 10 < [ drop 255 ] when ]
} cond ; inline
: string>digits ( str -- digits )
[ digit> ] B{ } map-as ; inline
: >digit ( n -- ch )
dup 10 < [ CHAR: 0 + ] [ 10 - CHAR: a + ] if ; inline
ERROR: invalid-radix radix ;
<PRIVATE
! magnitude is used only for floats to avoid
! expensive computations when we know that
! the result will overflow/underflow.
! The computation of magnitude starts in
! number-parse and continues in float-parse.
TUPLE: number-parse
{ str read-only }
{ length fixnum read-only }
{ radix fixnum }
{ magnitude fixnum } ;
: <number-parse> ( str radix -- i number-parse n )
[ 0 ] 2dip [ dup length ] dip 0 number-parse boa 0 ; inline
: (next-digit) ( i number-parse n digit-quot end-quot -- n/f )
[ 2over length>> < ] 2dip
[ [ 2over str>> nth-unsafe >fixnum [ 1 fixnum+fast ] 3dip ] prepose ] dip if ; inline
: require-next-digit ( i number-parse n quot -- n/f )
[ 3drop f ] (next-digit) ; inline
: next-digit ( i number-parse n quot -- n/f )
[ 2nip ] (next-digit) ; inline
: inc-magnitude ( number-parse -- number-parse' )
[ 1 fixnum+fast ] change-magnitude ; inline
: ?inc-magnitude ( number-parse n -- number-parse' )
zero? [ inc-magnitude ] unless ; inline
: (add-digit) ( number-parse n digit -- number-parse n' )
[ dup radix>> ] [ * ] [ + ] tri* ; inline
: add-digit ( i number-parse n digit quot -- n/f )
[ (add-digit) [ ?inc-magnitude ] keep ] dip next-digit ; inline
: add-exponent-digit ( i number-parse n digit quot -- n/f )
[ (add-digit) ] dip next-digit ; inline
: digit-in-radix ( number-parse n char -- number-parse n digit ? )
digit> pick radix>> over > ; inline
: ?make-ratio ( num denom/f -- ratio/f )
! don't use number= to allow 0. for "1/0."
[ dup 0 = [ 2drop f ] [ / ] if ] [ drop f ] if* ; inline
TUPLE: float-parse
{ radix fixnum }
{ point fixnum }
{ exponent }
{ magnitude } ;
: inc-point-?dec-magnitude ( float-parse n -- float-parse' )
zero? [ [ 1 fixnum-fast ] change-magnitude ] when
[ 1 fixnum+fast ] change-point ; inline
: store-exponent ( float-parse n expt -- float-parse' n )
swap [ >>exponent ] dip ; inline
: ?store-exponent ( float-parse n expt/f -- float-parse' n/f )
[ store-exponent ] [ drop f ] if* ; inline
: pow-until ( base x -- base^x )
[ 1 ] 2dip
[ dup zero? ] [
dup odd? [ [ [ * ] keep ] [ 1 - ] bi* ] when
[ sq ] [ 2/ ] bi*
] until 2drop ; inline
: (pow) ( base x -- base^x )
integer>fixnum-strict
dup 0 >= [ pow-until ] [ [ recip ] [ neg ] bi* pow-until ] if ; inline
: add-mantissa-digit ( float-parse i number-parse n digit quot -- float-parse' n/f )
[ (add-digit)
dup [ inc-point-?dec-magnitude ] curry 3dip
] dip next-digit ; inline
! IEE754 doubles are in the range ]10^309,10^-324[,
! or expressed in base 2, ]2^1024, 2^-1074].
! We don't need those ranges to be accurate as long as we are
! excluding all the floats because they are used only to
! optimize when we know there will be an overflow/underflow
! We compare these numbers to the magnitude slot of float-parse,
! which has the following behavior:
! ... ; 0.0xxx -> -1; 0.xxx -> 0; x.xxx -> 1; xx.xxx -> 2; ...;
! Also, take some margin as the current float parsing algorithm
! does some rounding; For example,
! 0x1.0p-1074 is the smallest IE754 double, but floats down to
! 0x0.8p-1074 (excluded) are parsed as 0x1.0p-1074
CONSTANT: max-magnitude-10 309
CONSTANT: min-magnitude-10 -323
CONSTANT: max-magnitude-2 1027
CONSTANT: min-magnitude-2 -1074
: make-float-dec-exponent ( float-parse n/f -- float/f )
over [ exponent>> ] [ magnitude>> ] bi +
{
{ [ dup max-magnitude-10 > ] [ 3drop 1/0. ] }
{ [ dup min-magnitude-10 < ] [ 3drop 0.0 ] }
[ drop
[ [ radix>> ] [ point>> ] [ exponent>> ] tri - (pow) ]
[ swap /f ] bi*
]
} cond ; inline
: base2-digits ( digits radix -- digits' )
{
{ 16 [ 4 * ] }
{ 8 [ 3 * ] }
{ 2 [ ] }
} case ; inline
: base2-point ( float-parse -- point )
[ point>> ] [ radix>> ] bi base2-digits ; inline
: base2-magnitude ( float-parse -- point )
[ magnitude>> ] [ radix>> ] bi base2-digits ; inline
: make-float-bin-exponent ( float-parse n/f -- float/f )
over [ exponent>> ] [ base2-magnitude ] bi +
{
{ [ dup max-magnitude-2 > ] [ 3drop 1/0. ] }
{ [ dup min-magnitude-2 < ] [ 3drop 0.0 ] }
[ drop
[ [ drop 2 ] [ base2-point ] [ exponent>> ] tri - (pow) ]
[ swap /f ] bi*
]
} cond ; inline
: ?default-exponent ( float-parse n/f -- float-parse' n/f' )
over exponent>> [
over radix>> 10 = [ 0 store-exponent ] [ drop f ] if
] unless ; inline
: ?make-float ( float-parse n/f -- float/f )
{ float-parse object } declare
?default-exponent
{
{ [ dup not ] [ 2drop f ] }
{ [ over radix>> 10 = ] [ make-float-dec-exponent ] }
[ make-float-bin-exponent ]
} cond ;
: ?neg ( n/f -- -n/f )
[
dup bignum? [
dup first-bignum bignum=
[ drop most-negative-fixnum ] [ neg ] if
] [ neg ] if
] [ f ] if* ; inline
: ?add-ratio ( m n/f -- m+n/f )
dup ratio? [ + ] [ 2drop f ] if ; inline
: @abort ( i number-parse n x -- f )
4drop f ; inline
: @split ( i number-parse n -- n i number-parse' n' )
-rot 0 >>magnitude 0 ; inline
: @split-exponent ( i number-parse n -- n i number-parse' n' )
-rot 10 >>radix 0 ; inline
: <float-parse> ( i number-parse n -- float-parse i number-parse n )
[ drop nip [ radix>> ] [ magnitude>> ] bi [ 0 f ] dip float-parse boa ] 3keep ; inline
DEFER: @exponent-digit
DEFER: @mantissa-digit
DEFER: @denom-digit
DEFER: @num-digit
DEFER: @pos-digit
DEFER: @neg-digit
: @exponent-digit-or-punc ( float-parse i number-parse n char -- float-parse n/f )
{
{ CHAR: , [ [ @exponent-digit ] require-next-digit ] }
[ @exponent-digit ]
} case ; inline
: @exponent-digit ( float-parse i number-parse n char -- float-parse n/f )
{ float-parse fixnum number-parse integer fixnum } declare
digit-in-radix [ [ @exponent-digit-or-punc ] add-exponent-digit ] [ @abort ] if ;
: @exponent-first-char ( float-parse i number-parse n char -- float-parse n/f )
{
{ CHAR: - [ [ @exponent-digit ] require-next-digit ?neg ] }
{ CHAR: + [ [ @exponent-digit ] require-next-digit ] }
[ @exponent-digit ]
} case ; inline
: ->exponent ( float-parse i number-parse n -- float-parse' n/f )
@split-exponent [ @exponent-first-char ] require-next-digit ?store-exponent ; inline
: exponent-char? ( number-parse n char -- number-parse n char ? )
pick radix>> {
{ 10 [ dup "eE" member-eq? ] }
[ drop dup "pP" member-eq? ]
} case ; inline
: or-exponent ( i number-parse n char quot -- n/f )
[ exponent-char? [ drop <float-parse> ->exponent ?make-float ] ] dip if ; inline
: or-mantissa->exponent ( float-parse i number-parse n char quot -- float-parse n/f )
[ exponent-char? [ drop ->exponent ] ] dip if ; inline
: @mantissa-digit-or-punc ( float-parse i number-parse n char -- float-parse n/f )
{
{ CHAR: , [ [ @mantissa-digit ] require-next-digit ] }
[ @mantissa-digit ]
} case ; inline
: @mantissa-digit ( float-parse i number-parse n char -- float-parse n/f )
{ float-parse fixnum number-parse integer fixnum } declare
[
digit-in-radix
[ [ @mantissa-digit-or-punc ] add-mantissa-digit ]
[ @abort ] if
] or-mantissa->exponent ;
: ->mantissa ( i number-parse n -- n/f )
<float-parse> [ @mantissa-digit ] next-digit ?make-float ; inline
: ->required-mantissa ( i number-parse n -- n/f )
<float-parse> [ @mantissa-digit ] require-next-digit ?make-float ; inline
: @denom-digit-or-punc ( i number-parse n char -- n/f )
{
{ CHAR: , [ [ @denom-digit ] require-next-digit ] }
{ CHAR: . [ ->mantissa ] }
[ [ @denom-digit ] or-exponent ]
} case ; inline
: @denom-digit ( i number-parse n char -- n/f )
{ fixnum number-parse integer fixnum } declare
digit-in-radix [ [ @denom-digit-or-punc ] add-digit ] [ @abort ] if ;
: @denom-first-digit ( i number-parse n char -- n/f )
{
{ CHAR: . [ ->mantissa ] }
[ @denom-digit ]
} case ; inline
: ->denominator ( i number-parse n -- n/f )
{ fixnum number-parse integer } declare
@split [ @denom-first-digit ] require-next-digit ?make-ratio ;
: @num-digit-or-punc ( i number-parse n char -- n/f )
{
{ CHAR: , [ [ @num-digit ] require-next-digit ] }
{ CHAR: / [ ->denominator ] }
[ @num-digit ]
} case ; inline
: @num-digit ( i number-parse n char -- n/f )
{ fixnum number-parse integer fixnum } declare
digit-in-radix [ [ @num-digit-or-punc ] add-digit ] [ @abort ] if ;
: ->numerator ( i number-parse n -- n/f )
{ fixnum number-parse integer } declare
@split [ @num-digit ] require-next-digit ?add-ratio ;
: @pos-digit-or-punc ( i number-parse n char -- n/f )
{
{ CHAR: , [ [ @pos-digit ] require-next-digit ] }
{ CHAR: + [ ->numerator ] }
{ CHAR: / [ ->denominator ] }
{ CHAR: . [ ->mantissa ] }
[ [ @pos-digit ] or-exponent ]
} case ; inline
: @pos-digit ( i number-parse n char -- n/f )
{ fixnum number-parse integer fixnum } declare
digit-in-radix [ [ @pos-digit-or-punc ] add-digit ] [ @abort ] if ;
: ->radix ( i number-parse n quot radix -- i number-parse n quot )
[ >>radix ] curry 2dip ; inline
: with-radix-char ( i number-parse n radix-quot nonradix-quot -- n/f )
[
rot {
{ [ dup "bB" member-eq? ] [ 2drop 2 ->radix require-next-digit ] }
{ [ dup "oO" member-eq? ] [ 2drop 8 ->radix require-next-digit ] }
{ [ dup "xX" member-eq? ] [ 2drop 16 ->radix require-next-digit ] }
[ nipd swap call ]
} cond
] 2curry next-digit ; inline
: @pos-first-digit ( i number-parse n char -- n/f )
{
{ CHAR: . [ ->required-mantissa ] }
{ CHAR: 0 [ [ @pos-digit ] [ @pos-digit-or-punc ] with-radix-char ] }
[ @pos-digit ]
} case ; inline
: @neg-digit-or-punc ( i number-parse n char -- n/f )
{
{ CHAR: , [ [ @neg-digit ] require-next-digit ] }
{ CHAR: - [ ->numerator ] }
{ CHAR: / [ ->denominator ] }
{ CHAR: . [ ->mantissa ] }
[ [ @neg-digit ] or-exponent ]
} case ; inline
: @neg-digit ( i number-parse n char -- n/f )
{ fixnum number-parse integer fixnum } declare
digit-in-radix [ [ @neg-digit-or-punc ] add-digit ] [ @abort ] if ;
: @neg-first-digit ( i number-parse n char -- n/f )
{
{ CHAR: . [ ->required-mantissa ] }
{ CHAR: 0 [ [ @neg-digit ] [ @neg-digit-or-punc ] with-radix-char ] }
[ @neg-digit ]
} case ; inline
: @first-char ( i number-parse n char -- n/f )
{
{ CHAR: - [ [ @neg-first-digit ] require-next-digit ?neg ] }
{ CHAR: + [ [ @pos-first-digit ] require-next-digit ] }
[ @pos-first-digit ]
} case ; inline
: @neg-first-digit-no-radix ( i number-parse n char -- n/f )
{
{ CHAR: . [ ->required-mantissa ] }
[ @neg-digit ]
} case ; inline
: @pos-first-digit-no-radix ( i number-parse n char -- n/f )
{
{ CHAR: . [ ->required-mantissa ] }
[ @pos-digit ]
} case ; inline
: @first-char-no-radix ( i number-parse n char -- n/f )
{
{ CHAR: - [ [ @neg-first-digit-no-radix ] require-next-digit ?neg ] }
{ CHAR: + [ [ @pos-first-digit-no-radix ] require-next-digit ] }
[ @pos-first-digit-no-radix ]
} case ; inline
PRIVATE>
: string>number ( str -- n/f )
10 <number-parse> [ @first-char ] require-next-digit ;
: base> ( str radix -- n/f )
<number-parse> [ @first-char-no-radix ] require-next-digit ;
: bin> ( str -- n/f ) 2 base> ; inline
: oct> ( str -- n/f ) 8 base> ; inline
: dec> ( str -- n/f ) 10 base> ; inline
: hex> ( str -- n/f ) 16 base> ; inline
<PRIVATE
CONSTANT: TENS B{
48 48 48 48 48 48 48 48 48 48 49 49 49 49 49 49 49 49 49 49
50 50 50 50 50 50 50 50 50 50 51 51 51 51 51 51 51 51 51 51
52 52 52 52 52 52 52 52 52 52 53 53 53 53 53 53 53 53 53 53
54 54 54 54 54 54 54 54 54 54 55 55 55 55 55 55 55 55 55 55
56 56 56 56 56 56 56 56 56 56 57 57 57 57 57 57 57 57 57 57
}
CONSTANT: ONES B{
48 49 50 51 52 53 54 55 56 57 48 49 50 51 52 53 54 55 56 57
48 49 50 51 52 53 54 55 56 57 48 49 50 51 52 53 54 55 56 57
48 49 50 51 52 53 54 55 56 57 48 49 50 51 52 53 54 55 56 57
48 49 50 51 52 53 54 55 56 57 48 49 50 51 52 53 54 55 56 57
48 49 50 51 52 53 54 55 56 57 48 49 50 51 52 53 54 55 56 57
}
: (two-digit) ( num accum -- num' accum )
[
100 /mod [ TENS nth-unsafe ] [ ONES nth-unsafe ] bi
] dip [ push ] keep [ push ] keep ; inline
: (one-digit) ( num accum -- num' accum )
[ 10 /mod CHAR: 0 + ] dip [ push ] keep ; inline
: (bignum>dec) ( num accum -- num' accum )
[ over most-positive-fixnum > ]
[ { bignum sbuf } declare (two-digit) ] while
[ >fixnum ] dip ; inline
: (fixnum>dec) ( num accum -- num' accum )
{ fixnum sbuf } declare
[ over 10 >= ] [ (two-digit) ] while
[ over zero? ] [ (one-digit) ] until ; inline
GENERIC: (positive>dec) ( num -- str )
M: bignum (positive>dec)
12 <sbuf> (bignum>dec) (fixnum>dec) "" like reverse! nip ; inline
: (count-digits) ( digits n -- digits' )
{
{ [ dup 10 < ] [ drop ] }
{ [ dup 100 < ] [ drop 1 fixnum+fast ] }
{ [ dup 1,000 < ] [ drop 2 fixnum+fast ] }
[
dup 1,000,000,000,000 < [
dup 100,000,000 < [
dup 1,000,000 < [
dup 10,000 < [
drop 3
] [
100,000 >= 5 4 ?
] if
] [
10,000,000 >= 7 6 ?
] if
] [
dup 10,000,000,000 < [
1,000,000,000 >= 9 8 ?
] [
100,000,000,000 >= 11 10 ?
] if
] if fixnum+fast
] [
[ 12 fixnum+fast ] [ 1,000,000,000,000 /i ] bi*
(count-digits)
] if
]
} cond ; inline recursive
M: fixnum (positive>dec)
1 over (count-digits) <sbuf> (fixnum>dec) "" like reverse! nip ; inline
: (positive>base) ( num radix -- str )
dup 1 <= [ invalid-radix ] when
[ dup 0 > ] swap [ /mod >digit ] curry "" produce-as nip
reverse! ; inline
: positive>base ( num radix -- str )
dup 10 = [ drop (positive>dec) ] [ (positive>base) ] if ; inline
PRIVATE>
GENERIC#: >base 1 ( n radix -- str )
: number>string ( n -- str ) 10 >base ; inline
: >bin ( n -- str ) 2 >base ; inline
: >oct ( n -- str ) 8 >base ; inline
: >hex ( n -- str ) 16 >base ; inline
M: integer >base
{
{ [ over 0 = ] [ 2drop "0" ] }
{ [ over 0 > ] [ positive>base ] }
[ [ neg ] dip positive>base CHAR: - prefix ]
} cond ;
M: ratio >base
[ >fraction [ /mod ] keep ] [ [ >base ] curry tri@ ] bi*
"/" glue over first-unsafe {
{ CHAR: 0 [ nip ] }
{ CHAR: - [ append ] }
[ drop "+" glue ]
} case ;
<PRIVATE
: (fix-float) ( str-no-exponent -- newstr )
CHAR: . over member? [ ".0" append ] unless ; inline
: fix-float ( str exponent-char -- newstr )
over index [
cut [ (fix-float) ] dip append
] [ (fix-float) ] if* ; inline
: mantissa-expt-normalize ( mantissa expt -- mantissa' expt' )
[ dup log2 52 swap - [ shift 52 2^ 1 - bitand ] [ 1022 + neg ] bi ]
[ 1023 - ] if-zero ;
: mantissa-expt ( float -- mantissa expt )
[ 52 2^ 1 - bitand ]
[ -0.0 double>bits bitnot bitand -52 shift ] bi
mantissa-expt-normalize ;
: bin-float-sign ( bits -- str )
-0.0 double>bits bitand zero? "" "-" ? ;
: bin-float-value ( str size -- str' )
CHAR: 0 pad-head [ CHAR: 0 = ] trim-tail
[ "0" ] when-empty "1." prepend ;
: float>hex-value ( mantissa -- str )
>hex 13 bin-float-value ;
: float>oct-value ( mantissa -- str )
4 * >oct 18 bin-float-value ;
: float>bin-value ( mantissa -- str )
>bin 52 bin-float-value ;
: bin-float-expt ( mantissa -- str )
10 >base "p" prepend ;
: (bin-float>base) ( value-quot n -- str )
double>bits
[ bin-float-sign swap ] [
mantissa-expt rot [ bin-float-expt ] bi*
] bi 3append ; inline
: bin-float>base ( n base -- str )
{
{ 16 [ [ float>hex-value ] swap (bin-float>base) ] }
{ 8 [ [ float>oct-value ] swap (bin-float>base) ] }
{ 2 [ [ float>bin-value ] swap (bin-float>base) ] }
[ invalid-radix ]
} case ;
: format-string ( format -- format )
0 suffix >byte-array ; foldable
: format-float ( n fill width precision format locale -- string )
[
[ format-string ] 4dip [ format-string ] bi@ (format-float)
>string
] [
"C" = [ [ "G" = ] [ "E" = ] bi or CHAR: E CHAR: e ? fix-float ]
[ drop ] if
] 2bi ; inline
: float>base ( n radix -- str )
{
{ 10 [ "" -1 16 "" "C" format-float ] }
[ bin-float>base ]
} case ; inline
PRIVATE>
M: float >base
{
{ [ over fp-nan? ] [ 2drop "0/0." ] }
{ [ over 1/0. = ] [ 2drop "1/0." ] }
{ [ over -1/0. = ] [ 2drop "-1/0." ] }
{ [ over 0.0 fp-bitwise= ] [ 2drop "0.0" ] }
{ [ over -0.0 fp-bitwise= ] [ 2drop "-0.0" ] }
[ float>base ]
} cond ;
: # ( n -- ) number>string % ; inline
: hex-string>bytes ( hex-string -- bytes )
dup length 2/ <byte-array> [
[
[ digit> ] 2dip over even? [
[ 16 * ] [ 2/ ] [ set-nth ] tri*
] [
[ 2/ ] [ [ + ] change-nth ] bi*
] if
] curry each-index
] keep ;
: bytes>hex-string ( bytes -- hex-string )
dup length 2 * CHAR: 0 <string> [
[
[ 16 /mod [ >digit ] bi@ ]
[ 2 * dup 1 + ]
[ [ set-nth ] curry bi-curry@ bi* ] tri*
] curry each-index
] keep ;