fixnum-shift intrinsics

2005-05-10 02:34:47 +00:00 · 2005-05-10 02:34:47 +00:00 · 910812b502
parent 4face990d7
commit 910812b502
16 changed files with 195 additions and 37 deletions
--- a/library/compiler/intrinsics.factor
+++ b/library/compiler/intrinsics.factor
@ -69,10 +69,12 @@ sequences words ;
    out-1
 ] "linearizer" set-word-prop

-: top-literal? ( seq -- ? ) peek literal? ;
+: node-peek ( node -- obj ) node-consume-d swap hash peek ;
+
 : peek-2 dup length 2 - swap nth ;
-: next-typed? ( seq -- ? )
-    peek-2 value-types length 1 = ;
+: node-peek-2 ( node -- obj ) node-consume-d swap hash peek-2 ;
+
+: typed? ( value -- ? ) value-types length 1 = ;

 : self ( word -- )
    f swap dup "infer-effect" word-prop (consume/produce) ;
@ -82,14 +84,19 @@ sequences words ;

 \ slot intrinsic

-: slot@ ( seq -- n )
+: slot@ ( node -- n )
    #! Compute slot offset.
+    node-consume-d swap hash
    dup peek literal-value cell *
    swap peek-2 value-types car type-tag - ;

+: typed-literal? ( node -- ? )
+    #! Output if the node's first input is well-typed, and the
+    #! second is a literal.
+    dup node-peek literal? swap node-peek-2 typed? and ;
+
 \ slot [
-    node-consume-d swap hash
-    dup top-literal? over next-typed? and [
+    dup typed-literal? [
        1 %dec-d ,
        in-1
        0 swap slot@ %fast-slot ,
@ -105,8 +112,7 @@ sequences words ;
 \ set-slot intrinsic

 \ set-slot [
-    node-consume-d swap hash
-    dup top-literal? over next-typed? and [
+    dup typed-literal? [
        1 %dec-d ,
        in-2
        2 %dec-d ,
@ -149,11 +155,10 @@ sequences words ;

 : binary-op ( node op out -- )
    #! out is a vreg where the vop stores the result.
-    >r >r node-consume-d swap hash
-    dup top-literal? [
+    >r >r node-peek dup literal? [
        1 %dec-d ,
        in-1
-        peek literal-value 0 <vreg> r> execute ,
+        literal-value 0 <vreg> r> execute ,
        r> 0 %replace-d ,
    ] [
        drop
@ -166,7 +171,6 @@ sequences words ;
    [[ fixnum-bitand %fixnum-bitand ]]
    [[ fixnum-bitor  %fixnum-bitor  ]]
    [[ fixnum-bitxor %fixnum-bitxor ]]
-    [[ fixnum-shift  %fixnum-shift  ]]
    [[ fixnum<=      %fixnum<=      ]]
    [[ fixnum<       %fixnum<       ]]
    [[ fixnum>=      %fixnum>=      ]]
@ -181,7 +185,19 @@ sequences words ;
 \ fixnum* intrinsic

 \ fixnum* [
-    drop \ %fixnum* 0 binary-op-reg
+    ! Turn multiplication by a power of two into a left shift.
+    node-peek dup literal? [
+        literal-value dup power-of-2? [
+            1 %dec-d ,
+            in-1
+            log2 0 <vreg> %fixnum<< ,
+            0 0 %replace-d ,
+        ] [
+            drop binary-op-reg
+        ] ifte
+    ] [
+        drop binary-op-reg
+    ] ifte
 ] "linearizer" set-word-prop

 \ fixnum-mod intrinsic
@ -218,3 +234,48 @@ sequences words ;
    0 %fixnum-bitnot ,
    out-1
 ] "linearizer" set-word-prop
+
+: slow-shift ( -- ) \ fixnum-shift %call , ;
+
+: negative-shift ( n -- )
+    1 %dec-d ,
+    in-1
+    dup cell -8 * <= [
+        drop 0 <vreg> 2 <vreg> %fixnum-sgn ,
+        2 0 %replace-d ,
+    ] [
+        neg 0 <vreg> %fixnum>> ,
+        out-1
+    ] ifte ;
+
+: positive-shift ( n -- )
+    dup cell 8 * tag-bits - <= [
+        1 %dec-d ,
+        in-1
+        0 <vreg> %fixnum<< ,
+        out-1
+    ] [
+        drop slow-shift
+    ] ifte ;
+
+: fast-shift ( n -- )
+    dup 0 = [
+        1 %dec-d ,
+        drop
+    ] [
+        dup 0 < [
+            negative-shift
+        ] [
+            positive-shift
+        ] ifte
+    ] ifte ;
+
+\ fixnum-shift intrinsic
+
+\ fixnum-shift [
+    node-peek dup literal? [
+        literal-value fast-shift
+    ] [
+        drop slow-shift
+    ] ifte
+] "linearizer" set-word-prop
--- a/library/compiler/simplifier.factor
+++ b/library/compiler/simplifier.factor
@ -191,7 +191,7 @@ M: %call-label simplify-node ( linear vop -- ? )
 : dead-code ( linear -- linear ? )
    uncons (dead-code) >r cons r> ;

-M: %jump-label simplify-node ( linear vop -- ? )
+M: %jump-label simplify-node ( linear vop -- linear ? )
    drop
    \ %return dup double-jump [
        t
@ -211,7 +211,6 @@ M: %jump-label simplify-node ( linear vop -- ? )
            ! ] ifte
        ] ifte
    ] ifte ;
-
 ! 
 ! #jump-label [
 !     [ #return #return double-jump ]
--- a/library/compiler/vops.factor
+++ b/library/compiler/vops.factor
@ -142,7 +142,6 @@ VOP: %fixnum-bitand : %fixnum-bitand src/dest-vop <%fixnum-bitand> ;
 VOP: %fixnum-bitor  : %fixnum-bitor src/dest-vop <%fixnum-bitor> ;
 VOP: %fixnum-bitxor : %fixnum-bitxor src/dest-vop <%fixnum-bitxor> ;
 VOP: %fixnum-bitnot : %fixnum-bitnot <vreg> dest-vop <%fixnum-bitnot> ;
-VOP: %fixnum-shift  : %fixnum-shift src/dest-vop <%fixnum-shift> ;

 VOP: %fixnum<=      : %fixnum<= src/dest-vop <%fixnum<=> ;
 VOP: %fixnum<       : %fixnum< src/dest-vop <%fixnum<> ;
@ -150,6 +149,22 @@ VOP: %fixnum>=      : %fixnum>= src/dest-vop <%fixnum>=> ;
 VOP: %fixnum>       : %fixnum> src/dest-vop <%fixnum>> ;
 VOP: %eq?           : %eq? src/dest-vop <%eq?> ;

+! At the VOP level, the 'shift' operation is split into five
+! distinct operations:
+! - shifts with a large positive count: calls runtime to make
+!   a bignum
+! - shifts with a small positive count: %fixnum<<
+! - shifts with a small negative count: %fixnum>>
+! - shifts with a small negative count: %fixnum>>
+! - shifts with a large negative count: %fixnum-sgn
+VOP: %fixnum<<   : %fixnum<<   src/dest-vop <%fixnum<<> ;
+VOP: %fixnum>>   : %fixnum>>   src/dest-vop <%fixnum>>> ;
+! due to x86 limitations the destination of this VOP must be
+! vreg 2 (EDX), and the source must be vreg 0 (EAX).
+VOP: %fixnum-sgn : %fixnum-sgn src/dest-vop <%fixnum-sgn> ;
+
+! Integer comparison followed by a conditional branch is
+! optimized
 VOP: %jump-fixnum<= : %jump-fixnum<= f swap <%jump-fixnum<=> ;
 VOP: %jump-fixnum<  : %jump-fixnum< f swap <%jump-fixnum<> ;
 VOP: %jump-fixnum>= : %jump-fixnum>= f swap <%jump-fixnum>=> ;
--- a/library/compiler/x86/assembler.factor
+++ b/library/compiler/x86/assembler.factor
@ -202,7 +202,7 @@ M: word JUMPcc ( opcode addr -- )
 : JNO HEX: 81 swap JUMPcc ;
 : JB  HEX: 82 swap JUMPcc ;
 : JAE HEX: 83 swap JUMPcc ;
-: JE  HEX: 84 swap JUMPcc ;
+: JE  HEX: 84 swap JUMPcc ; ! aka JZ
 : JNE HEX: 85 swap JUMPcc ;
 : JBE HEX: 86 swap JUMPcc ;
 : JA  HEX: 87 swap JUMPcc ;
@ -260,12 +260,14 @@ M: operand CMP OCT: 071 2-operand ;

 : CDQ HEX: 99 compile-byte ;

+: ROL ( dst n -- ) HEX: c1 BIN: 000 immediate-8 ;
+: ROR ( dst n -- ) HEX: c1 BIN: 001 immediate-8 ;
+: RCL ( dst n -- ) HEX: c1 BIN: 010 immediate-8 ;
+: RCR ( dst n -- ) HEX: c1 BIN: 011 immediate-8 ;
 : SHL ( dst n -- ) HEX: c1 BIN: 100 immediate-8 ;
 : SHR ( dst n -- ) HEX: c1 BIN: 101 immediate-8 ;
 : SAR ( dst n -- ) HEX: c1 BIN: 111 immediate-8 ;

-: RCR ( dst -- ) HEX: d1 compile-byte BIN: 011 1-operand ;
-
 : LEA ( dst src -- )
    HEX: 8d compile-byte swap register 1-operand ;

--- a/library/compiler/x86/fixnum.factor
+++ b/library/compiler/x86/fixnum.factor
@ -12,7 +12,7 @@ memory namespaces words ;
    "end" get JNO
    ! There was an overflow. Untag the fixnum and add the carry.
    ! Thanks to Dazhbog for figuring out this trick.
-    dup RCR
+    dup 1 RCR
    dup 2 SAR
    ! Create a bignum
    PUSH
@ -36,7 +36,6 @@ M: %fixnum* generate-node ( vop -- )
    ECX IMUL
    <label> "end" set
    "end" get JNO
-    ! make a bignum
    EDX PUSH
    EAX PUSH
    "s48_long_long_to_bignum" f compile-c-call
@ -70,7 +69,7 @@ M: %fixnum-mod generate-node ( vop -- )
    ECX EAX MOV
    ! Tag the value, since division cancelled tags from both
    ! inputs
-    EAX 3 SHL
+    EAX tag-bits SHL
    ! Did it overflow?
    "end" get JNO
    ! There was an overflow, so make ECX into a bignum. we must
@ -80,7 +79,7 @@ M: %fixnum-mod generate-node ( vop -- )
    "s48_long_to_bignum" f compile-c-call
    ! An untagged pointer to the bignum is now in EAX; tag it
    EAX bignum-tag OR
-    ESP 4 ADD
+    ESP cell ADD
    ! the remainder is now in EDX
    EDX POP
    "end" get save-xt ;
@ -101,6 +100,47 @@ M: %fixnum-bitnot generate-node ( vop -- )
    ! Mask off the low 3 bits to give a fixnum tag
    tag-mask XOR ;

+M: %fixnum<< generate-node
+    ! This has specific register requirements.
+    <label> "no-overflow" set
+    <label> "end" set
+    ! make a copy
+    ECX EAX MOV
+    vop-source
+    ! check for potential overflow
+    1 over cell 8 * swap 1 - - shift ECX over ADD
+    2 * 1 - ECX swap CMP
+    ! is there going to be an overflow?
+    "no-overflow" get JBE
+    ! there is going to be an overflow, make a bignum
+    EAX tag-bits SAR
+    dup ( n) PUSH
+    EAX PUSH
+    "s48_long_to_bignum" f compile-c-call
+    EDX POP
+    EAX PUSH
+    "s48_bignum_arithmetic_shift" f compile-c-call
+    ! tag the result
+    EAX bignum-tag OR
+    ESP cell 2 * ADD
+    "end" get JMP
+    ! there is not going to be an overflow
+    "no-overflow" get save-xt
+    EAX swap SHL
+    "end" get save-xt ;
+
+M: %fixnum>> generate-node
+    ! shift register
+    dup vop-dest v>operand dup rot vop-source SAR
+    ! give it a fixnum tag
+    tag-mask bitnot AND ;
+
+M: %fixnum-sgn generate-node
+    ! store 0 in EDX if EAX is >=0, otherwise store -1.
+    CDQ
+    ! give it a fixnum tag.
+    vop-dest v>operand tag-bits SHL ;
+
 : conditional ( dest cond -- )
    #! Compile this after a conditional jump to store f or t
    #! in dest depending on the jump being taken or not.
--- a/library/compiler/x86/generator.factor
+++ b/library/compiler/x86/generator.factor
@ -141,9 +141,9 @@ M: %arithmetic-type generate-node ( vop -- )
    ECX [ ESI ] MOV
    ! Compute their tags
    EAX BIN: 111 AND
-    EDX BIN: 111 AND
+    ECX BIN: 111 AND
    ! Are the tags equal?
-    EAX EDX CMP
+    EAX ECX CMP
    "end" get JE
    ! No, they are not equal. Call a runtime function to
    ! coerce the integers to a higher type.
--- a/library/math/math.factor
+++ b/library/math/math.factor
@ -81,4 +81,13 @@ GENERIC: abs ( z -- |z| )
        rot [ [ rot dup slip -rot ] repeat ] keep -rot
    ] repeat 2drop ; inline

-: power-of-2? ( n -- ? ) dup dup neg bitand = ;
+: power-of-2? ( n -- ? )
+    dup 0 > [
+        dup dup neg bitand =
+    ] [
+        drop f
+    ] ifte ;
+
+: log2 ( n -- b )
+    #! Log base two for integers.
+    dup 1 = [ drop 0 ] [ 2 /i log2 1 + ] ifte ;
--- a/library/test/benchmark/fib.factor
+++ b/library/test/benchmark/fib.factor
@ -35,4 +35,4 @@ TUPLE: box i ;
        swap box-i swap box-i + <box>
    ] ifte ; compiled

-[ << box f 9227465 ] [ << box f 34 >> tuple-fib ] unit-test
+[ << box f 9227465 >> ] [ << box f 34 >> tuple-fib ] unit-test
--- a/library/test/compiler/intrinsics.factor
+++ b/library/test/compiler/intrinsics.factor
@ -48,6 +48,28 @@ math-internals test words ;
 [ 11 ] [ 12 [ 7 fixnum-bitxor ] compile-1 ] unit-test
 [ 11 ] [ [ 12 7 fixnum-bitxor ] compile-1 ] unit-test

+[ 4294967296 ] [ 1 32 [ fixnum-shift ] compile-1 ] unit-test
+[ 4294967296 ] [ 1 [ 32 fixnum-shift ] compile-1 ] unit-test
+[ 4294967296 ] [ 1 [ 16 fixnum-shift 16 fixnum-shift ] compile-1 ] unit-test
+[ -4294967296 ] [ -1 32 [ fixnum-shift ] compile-1 ] unit-test
+[ -4294967296 ] [ -1 [ 32 fixnum-shift ] compile-1 ] unit-test
+[ -4294967296 ] [ -1 [ 16 fixnum-shift 16 fixnum-shift ] compile-1 ] unit-test
+
+[ 8 ] [ 1 3 [ fixnum-shift ] compile-1 ] unit-test
+[ 8 ] [ 1 [ 3 fixnum-shift ] compile-1 ] unit-test
+[ 8 ] [ [ 1 3 fixnum-shift ] compile-1 ] unit-test
+[ -8 ] [ -1 3 [ fixnum-shift ] compile-1 ] unit-test
+[ -8 ] [ -1 [ 3 fixnum-shift ] compile-1 ] unit-test
+[ -8 ] [ [ -1 3 fixnum-shift ] compile-1 ] unit-test
+
+[ 2 ] [ 8 -2 [ fixnum-shift ] compile-1 ] unit-test
+[ 2 ] [ 8 [ -2 fixnum-shift ] compile-1 ] unit-test
+
+[ 0 ] [ [ 123 -64 fixnum-shift ] compile-1 ] unit-test
+[ 0 ] [ 123 -64 [ fixnum-shift ] compile-1 ] unit-test
+[ -1 ] [ [ -123 -64 fixnum-shift ] compile-1 ] unit-test
+[ -1 ] [ -123 -64 [ fixnum-shift ] compile-1 ] unit-test
+
 [ f ] [ 12 7 [ fixnum< ] compile-1 ] unit-test
 [ f ] [ 12 [ 7 fixnum< ] compile-1 ] unit-test
 [ f ] [ [ 12 7 fixnum< ] compile-1 ] unit-test
--- a/library/test/math/integer.factor
+++ b/library/test/math/integer.factor
@ -80,3 +80,8 @@ unit-test
 [ 1/8 ] [ 1/2 3 ^ ] unit-test
 [ 1/8 ] [ 2 -3 ^ ] unit-test
 [ t ] [ 1 100 shift 2 100 ^ = ] unit-test
+
+[ t ] [ 256 power-of-2? ] unit-test
+[ f ] [ 123 power-of-2? ] unit-test
+[ 8 ] [ 256 log2 ] unit-test
+[ 0 ] [ 1 log2 ] unit-test
--- a/library/test/test.factor
+++ b/library/test/test.factor
@ -4,7 +4,7 @@ IN: test
 USING: errors kernel lists math memory namespaces parser
 prettyprint sequences stdio strings unparser vectors words ;

-TUPLE: assert expect got ;
+TUPLE: assert got expect ;
 M: assert error.
    "Assertion failed" print
    "Expected: " write dup assert-expect .
--- a/native/compiler.h
+++ b/native/compiler.h
@ -10,8 +10,6 @@ typedef struct

 ZONE compiling;

-#define LITERAL_TABLE 4096
-
 CELL literal_top;
 CELL literal_max;

--- a/native/factor.c
+++ b/native/factor.c
@ -1,13 +1,13 @@
 #include "factor.h"

 void init_factor(char* image, CELL ds_size, CELL cs_size,
-	CELL data_size, CELL code_size)
+	CELL data_size, CELL code_size, CELL literal_size)
 {
 	srand((unsigned)time(NULL)); /* initialize random number generator */
 	init_ffi();
 	init_arena(data_size);
 	init_compiler(code_size);
-	load_image(image);
+	load_image(image,literal_size);
 	init_stacks(ds_size,cs_size);
 	init_c_io();
 	init_signals();
@ -34,6 +34,7 @@ int main(int argc, char** argv)
 	CELL cs_size = 2048;
 	CELL data_size = 16;
 	CELL code_size = 2;
+	CELL literal_size = 64;
 	CELL args;
 	CELL i;

@ -45,6 +46,7 @@ int main(int argc, char** argv)
 		printf(" +Cn   Call stack size, kilobytes\n");
 		printf(" +Mn   Data heap size, megabytes\n");
 		printf(" +Xn   Code heap size, megabytes\n");
+		printf(" +Ln   Literal table size, kilobytes. Only for bootstrapping\n");
 		printf("Other options are handled by the Factor library.\n");
 		printf("See the documentation for details.\n");
 		printf("Send bug reports to Slava Pestov <slava@jedit.org>.\n");
@ -57,6 +59,7 @@ int main(int argc, char** argv)
 		if(factor_arg(argv[i],"+C%d",&cs_size)) continue;
 		if(factor_arg(argv[i],"+M%d",&data_size)) continue;
 		if(factor_arg(argv[i],"+X%d",&code_size)) continue;
+		if(factor_arg(argv[i],"+L%d",&literal_size)) continue;

 		if(strncmp(argv[i],"+",1) == 0)
 		{
@ -69,7 +72,8 @@ int main(int argc, char** argv)
 		ds_size * 1024,
 		cs_size * 1024,
 		data_size * 1024 * 1024,
-		code_size * 1024 * 1024);
+		code_size * 1024 * 1024,
+		literal_size * 1024);

 	args = F;
 	while(--argc != 0)
--- a/native/fixnum.c
+++ b/native/fixnum.c
@ -32,6 +32,9 @@ void primitive_to_fixnum(void)
 	drepl(tag_fixnum(to_fixnum(dpeek())));
 }

+/* The fixnum arithmetic operations defined in C are relatively slow.
+The Factor compiler has optimized assembly intrinsics for all these
+operations. */
 void primitive_fixnum_add(void)
 {
 	F_FIXNUM y = untag_fixnum_fast(dpop());
--- a/native/image.c
+++ b/native/image.c
@ -1,6 +1,6 @@
 #include "factor.h"

-void load_image(char* filename)
+void load_image(char* filename, int literal_table)
 {
 	FILE* file;
 	HEADER h;
@ -24,9 +24,9 @@ void load_image(char* filename)
 			fread(&ext_h,sizeof(HEADER_2)/sizeof(CELL),sizeof(CELL),file);
 		else if(h.version == IMAGE_VERSION_0)
 		{
-			ext_h.size = LITERAL_TABLE;
+			ext_h.size = literal_table;
 			ext_h.literal_top = 0;
-			ext_h.literal_max = LITERAL_TABLE;
+			ext_h.literal_max = literal_table;
 			ext_h.relocation_base = compiling.base;
 		}
 		else
--- a/native/image.h
+++ b/native/image.h
@ -28,6 +28,6 @@ typedef struct EXT_HEADER {
 	CELL literal_max;
 } HEADER_2;

-void load_image(char* file);
+void load_image(char* file, int literal_size);
 bool save_image(char* file);
 void primitive_save_image(void);