compiler: new "binary literal area" at the end of a word's machine code stores constant floats and SIMD vectors; this allows ##load-reference/##load-memory fusion to be performed on x86-64, with a RIP-relative address reaching the data; also simplifies VM since custom relocation types used by the previous 32-bit-only optimization are no longer needed

db4
Slava Pestov 2010-05-03 17:23:03 -04:00
parent 9b4135713e
commit 58e4381eb1
17 changed files with 175 additions and 143 deletions

View File

@ -46,14 +46,14 @@ M: ##load-integer optimize-insn
! if the architecture supports it
: convert-to-load-double? ( insn -- ? )
{
[ drop object-immediates? ]
[ drop fused-unboxing? ]
[ dst>> rep-of double-rep? ]
[ obj>> float? ]
} 1&& ;
: convert-to-load-vector? ( insn -- ? )
{
[ drop object-immediates? ]
[ drop fused-unboxing? ]
[ dst>> rep-of vector-rep? ]
[ obj>> byte-array? ]
} 1&& ;

View File

@ -1,13 +1,14 @@
! Copyright (C) 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors compiler.cfg.instructions
USING: accessors cpu.architecture kernel
compiler.cfg.instructions
compiler.cfg.value-numbering.graph
compiler.cfg.value-numbering.rewrite cpu.architecture kernel ;
compiler.cfg.value-numbering.rewrite ;
IN: compiler.cfg.value-numbering.misc
M: ##replace rewrite
object-immediates? [
[ loc>> ] [ src>> vreg>insn ] bi dup literal-insn?
[ insn>literal swap \ ##replace-imm new-insn ]
[ 2drop f ] if
] [ drop f ] if ;
[ loc>> ] [ src>> vreg>insn ] bi
dup literal-insn? [
insn>literal dup immediate-store?
[ swap \ ##replace-imm new-insn ] [ 2drop f ] if
] [ 2drop f ] if ;

View File

@ -60,6 +60,74 @@ IN: compiler.cfg.value-numbering.tests
} value-numbering-step
] unit-test
! ##load-reference/##replace fusion
cpu x86? [
[
{
T{ ##load-integer f 0 10 }
T{ ##replace-imm f 10 D 0 }
}
] [
{
T{ ##load-integer f 0 10 }
T{ ##replace f 0 D 0 }
} value-numbering-step
] unit-test
[
{
T{ ##load-reference f 0 f }
T{ ##replace-imm f f D 0 }
}
] [
{
T{ ##load-reference f 0 f }
T{ ##replace f 0 D 0 }
} value-numbering-step
] unit-test
] when
cpu x86.32? [
[
{
T{ ##load-reference f 0 + }
T{ ##replace-imm f 10 D + }
}
] [
{
T{ ##load-reference f 0 + }
T{ ##replace f 0 D 0 }
} value-numbering-step
] unit-test
] when
cpu x86.64? [
[
{
T{ ##load-integer f 0 10,000,000,000 }
T{ ##replace f 0 D 0 }
}
] [
{
T{ ##load-integer f 0 10,000,000,000 }
T{ ##replace f 0 D 0 }
} value-numbering-step
] unit-test
! Boundary case
[
{
T{ ##load-integer f 0 HEX: 7fffffff }
T{ ##replace f 0 D 0 }
}
] [
{
T{ ##load-integer f 0 HEX: 7fffffff }
T{ ##replace f 0 D 0 }
} value-numbering-step
] unit-test
] when
! Double compare elimination
[
{

View File

@ -7,6 +7,15 @@ system combinators math.bitwise math.order generalizations
accessors growable fry compiler.constants memoize ;
IN: compiler.codegen.fixup
! Utilities
: push-uint ( value vector -- )
[ length ] [ B{ 0 0 0 0 } swap push-all ] [ underlying>> ] tri
swap set-alien-unsigned-4 ;
: push-double ( value vector -- )
[ length ] [ B{ 0 0 0 0 0 0 0 0 } swap push-all ] [ underlying>> ] tri
swap set-alien-double ;
! Owner
SYMBOL: compiling-word
@ -42,16 +51,18 @@ TUPLE: label-fixup { label label } { class integer } { offset integer } ;
! Relocation table
SYMBOL: relocation-table
: push-4 ( value vector -- )
[ length ] [ B{ 0 0 0 0 } swap push-all ] [ underlying>> ] tri
swap set-alien-unsigned-4 ;
: add-relocation-entry ( type class offset -- )
{ 0 24 28 } bitfield relocation-table get push-4 ;
{ 0 24 28 } bitfield relocation-table get push-uint ;
: rel-fixup ( class type -- )
swap compiled-offset add-relocation-entry ;
! Binary literal table
SYMBOL: binary-literal-table
: add-binary-literal ( obj -- label )
<label> [ 2array binary-literal-table get push ] keep ;
! Caching common symbol names reduces image size a bit
MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
@ -73,11 +84,8 @@ MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
: rel-literal ( literal class -- )
[ add-literal ] dip rt-literal rel-fixup ;
: rel-float ( literal class -- )
[ add-literal ] dip rt-float rel-fixup ;
: rel-byte-array ( literal class -- )
[ add-literal ] dip rt-byte-array rel-fixup ;
: rel-binary-literal ( literal class -- )
[ add-binary-literal ] dip label-fixup ;
: rel-this ( class -- )
rt-this rel-fixup ;
@ -95,20 +103,20 @@ MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
rt-decks-offset rel-fixup ;
! And the rest
: resolve-offset ( label-fixup -- offset )
: compute-target ( label-fixup -- offset )
label>> offset>> [ "Unresolved label" throw ] unless* ;
: resolve-absolute-label ( label-fixup -- )
dup resolve-offset neg add-literal
[ rt-here ] dip [ class>> ] [ offset>> ] bi add-relocation-entry ;
: compute-relative-label ( label-fixup -- label )
[ class>> ] [ offset>> ] [ compute-target ] tri 3array ;
: resolve-relative-label ( label-fixup -- label )
[ class>> ] [ offset>> ] [ resolve-offset ] tri 3array ;
: compute-absolute-label ( label-fixup -- )
[ compute-target neg add-literal ]
[ [ rt-here ] dip [ class>> ] [ offset>> ] bi add-relocation-entry ] bi ;
: resolve-labels ( label-fixups -- labels' )
: compute-labels ( label-fixups -- labels' )
[ class>> rc-absolute? ] partition
[ [ resolve-absolute-label ] each ]
[ [ resolve-relative-label ] map concat ]
[ [ compute-absolute-label ] each ]
[ [ compute-relative-label ] map concat ]
bi* ;
: init-fixup ( word -- )
@ -116,13 +124,39 @@ MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
V{ } clone parameter-table set
V{ } clone literal-table set
V{ } clone label-table set
BV{ } clone relocation-table set ;
BV{ } clone relocation-table set
V{ } clone binary-literal-table set ;
: alignment ( align -- n )
[ compiled-offset dup ] dip align swap - ;
: (align-code) ( n -- )
0 <repetition> % ;
: align-code ( n -- )
alignment (align-code) ;
GENERIC# emit-data 1 ( obj label -- )
M: float emit-data
8 align-code
resolve-label
building get push-double ;
M: byte-array emit-data
16 align-code
resolve-label
building get push-all ;
: emit-binary-literals ( -- )
binary-literal-table get [ emit-data ] assoc-each ;
: with-fixup ( word quot -- code )
'[
init-fixup
@
label-table [ resolve-labels ] change
emit-binary-literals
label-table [ compute-labels ] change
parameter-table get >array
literal-table get >array
relocation-table get >byte-array

View File

@ -67,8 +67,6 @@ CONSTANT: rt-vm 9
CONSTANT: rt-cards-offset 10
CONSTANT: rt-decks-offset 11
CONSTANT: rt-exception-handler 12
CONSTANT: rt-float 13
CONSTANT: rt-byte-array 14
: rc-absolute? ( n -- ? )
${

View File

@ -506,9 +506,9 @@ M: stack-params param-reg 2drop ;
! Does this architecture support %load-double, %load-vector and
! objects in %compare-imm?
HOOK: object-immediates? cpu ( -- ? )
HOOK: fused-unboxing? cpu ( -- ? )
M: object object-immediates? f ;
M: object fused-unboxing? f ;
! Can this value be an immediate operand for %add-imm, %sub-imm,
! or %mul-imm?
@ -522,9 +522,12 @@ HOOK: immediate-bitwise? cpu ( n -- ? )
! %compare-imm-branch?
HOOK: immediate-comparand? cpu ( n -- ? )
! Can this value be an immediate operand for %replace-imm?
HOOK: immediate-store? cpu ( obj -- ? )
M: object immediate-comparand? ( n -- ? )
{
{ [ dup integer? ] [ immediate-arithmetic? ] }
{ [ dup fixnum? ] [ tag-fixnum immediate-arithmetic? ] }
{ [ dup not ] [ drop t ] }
[ drop f ]
} cond ;

View File

@ -25,23 +25,13 @@ M: x86.32 stack-reg ESP ;
M: x86.32 frame-reg EBP ;
M: x86.32 temp-reg ECX ;
M: x86.32 object-immediates? ( -- ? ) t ;
M: x86.32 immediate-comparand? ( obj -- ? ) drop t ;
M: x86.32 %replace-imm ( src loc -- )
loc>operand swap
{
{ [ dup not ] [ drop \ f type-number MOV ] }
{ [ dup fixnum? ] [ tag-fixnum MOV ] }
[ [ HEX: ffffffff MOV ] dip rc-absolute rel-literal ]
} cond ;
M: x86.32 %load-double ( dst val -- )
[ 0 [] MOVSD ] dip rc-absolute rel-float ;
[ 0 [] MOVSD ] dip rc-absolute rel-binary-literal ;
M:: x86.32 %load-vector ( dst val rep -- )
dst 0 [] rep copy-memory* val rc-absolute rel-byte-array ;
dst 0 [] rep copy-memory* val rc-absolute rel-binary-literal ;
M: x86.32 %mov-vm-ptr ( reg -- )
0 MOV 0 rc-absolute-cell rel-vm ;
@ -81,9 +71,9 @@ M:: x86.32 %dispatch ( src temp -- )
temp HEX: 7f [+] JMP
building get length :> end
! Fix up the displacement above
cell code-alignment
cell alignment
[ end start - + building get dup pop* push ]
[ align-code ]
[ (align-code) ]
bi ;
M: x86.32 pic-tail-reg EDX ;

View File

@ -3,7 +3,7 @@
USING: accessors arrays kernel math namespaces make sequences
system layouts alien alien.c-types alien.accessors alien.libraries
slots splitting assocs combinators locals compiler.constants
compiler.codegen compiler.codegen.fixup
compiler.codegen compiler.codegen.alien compiler.codegen.fixup
compiler.cfg.instructions compiler.cfg.builder
compiler.cfg.intrinsics compiler.cfg.stack-frame
cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
@ -46,6 +46,12 @@ M: x86.64 %mov-vm-ptr ( reg -- )
M: x86.64 %vm-field ( dst offset -- )
[ vm-reg ] dip [+] MOV ;
M: x86.64 %load-double ( dst val -- )
[ 0 [RIP+] MOVSD ] dip rc-relative rel-binary-literal ;
M:: x86.64 %load-vector ( dst val rep -- )
dst 0 [RIP+] rep copy-memory* val rc-relative rel-binary-literal ;
M: x86.64 %set-vm-field ( src offset -- )
[ vm-reg ] dip [+] swap MOV ;
@ -85,9 +91,9 @@ M:: x86.64 %dispatch ( src temp -- )
temp HEX: 7f [+] JMP
building get length :> end
! Fix up the displacement above
cell code-alignment
cell alignment
[ end start - + building get dup pop* push ]
[ align-code ]
[ (align-code) ]
bi ;
M: stack-params copy-register*

View File

@ -1,9 +1,9 @@
! Copyright (C) 2008 Slava Pestov.
! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays sequences math splitting make assocs kernel
layouts system alien.c-types classes.struct cpu.architecture
cpu.x86.assembler cpu.x86.assembler.operands cpu.x86 compiler.codegen
compiler.cfg.registers ;
USING: accessors arrays sequences math splitting make assocs
kernel layouts system alien.c-types classes.struct
cpu.architecture cpu.x86.assembler cpu.x86.assembler.operands
cpu.x86 compiler.codegen.alien compiler.cfg.registers ;
IN: cpu.x86.64.unix
M: int-regs param-regs

View File

@ -70,6 +70,10 @@ HOOK: pic-tail-reg cpu ( -- reg )
M: x86 complex-addressing? t ;
M: x86 fused-unboxing? ( -- ? ) t ;
M: x86 immediate-store? ( obj -- ? ) immediate-comparand? ;
M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
M: x86 %load-reference
@ -88,7 +92,17 @@ M: ds-loc loc>operand n>> ds-reg reg-stack ;
M: rs-loc loc>operand n>> rs-reg reg-stack ;
M: x86 %peek loc>operand MOV ;
M: x86 %replace loc>operand swap MOV ;
M: x86 %replace-imm
loc>operand swap
{
{ [ dup not ] [ drop \ f type-number MOV ] }
{ [ dup fixnum? ] [ tag-fixnum MOV ] }
[ [ HEX: ffffffff MOV ] dip rc-absolute rel-literal ]
} cond ;
: (%inc) ( n reg -- ) swap cells dup 0 > [ ADD ] [ neg SUB ] if ; inline
M: x86 %inc-d ( n -- ) ds-reg (%inc) ;
M: x86 %inc-r ( n -- ) rs-reg (%inc) ;
@ -109,12 +123,6 @@ M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
M: x86 %return ( -- ) 0 RET ;
: code-alignment ( align -- n )
[ building get length dup ] dip align swap - ;
: align-code ( n -- )
0 <repetition> % ;
: (%slot) ( obj slot scale tag -- op ) neg <indirect> ; inline
: (%slot-imm) ( obj slot tag -- op ) slot-offset [+] ; inline
@ -388,8 +396,8 @@ M: x86.64 has-small-reg? 2drop t ;
{ c:uchar [ 8 %alien-unsigned-getter ] }
{ c:short [ 16 %alien-signed-getter ] }
{ c:ushort [ 16 %alien-unsigned-getter ] }
{ c:int [ 32 [ 2drop ] %alien-integer-getter ] }
{ c:uint [ 32 %alien-signed-getter ] }
{ c:int [ 32 %alien-signed-getter ] }
{ c:uint [ 32 [ 2drop ] %alien-integer-getter ] }
} case
] [ [ drop ] 2dip %copy ] ?if ;
@ -1448,7 +1456,7 @@ M: x86 %scalar>vector %copy ;
M:: x86 %spill ( src rep dst -- ) dst src rep %copy ;
M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
M: x86 %loop-entry 16 alignment [ NOP ] times ;
M:: x86 %restore-context ( temp1 temp2 -- )
#! Load Factor stack pointers on entry from C to Factor.

View File

@ -265,12 +265,6 @@ struct initial_code_block_visitor {
case RT_LITERAL:
op.store_value(next_literal());
break;
case RT_FLOAT:
op.store_float(next_literal());
break;
case RT_BYTE_ARRAY:
op.store_byte_array(next_literal());
break;
case RT_ENTRY_POINT:
op.store_value(parent->compute_entry_point_address(next_literal()));
break;

View File

@ -111,12 +111,6 @@ struct code_block_compaction_relocation_visitor {
case RT_LITERAL:
op.store_value(slot_forwarder.visit_pointer(op.load_value(old_offset)));
break;
case RT_FLOAT:
op.store_float(slot_forwarder.visit_pointer(op.load_float(old_offset)));
break;
case RT_BYTE_ARRAY:
op.store_byte_array(slot_forwarder.visit_pointer(op.load_byte_array(old_offset)));
break;
case RT_ENTRY_POINT:
case RT_ENTRY_POINT_PIC:
case RT_ENTRY_POINT_PIC_TAIL:

View File

@ -185,12 +185,6 @@ struct code_block_fixup_relocation_visitor {
case RT_LITERAL:
op.store_value(data_visitor.visit_pointer(op.load_value(old_offset)));
break;
case RT_FLOAT:
op.store_float(data_visitor.visit_pointer(op.load_float(old_offset)));
break;
case RT_BYTE_ARRAY:
op.store_byte_array(data_visitor.visit_pointer(op.load_byte_array(old_offset)));
break;
case RT_ENTRY_POINT:
case RT_ENTRY_POINT_PIC:
case RT_ENTRY_POINT_PIC_TAIL:

View File

@ -62,26 +62,6 @@ fixnum instruction_operand::load_value()
return load_value(pointer);
}
cell instruction_operand::load_float()
{
return (cell)load_value() - boxed_float_offset;
}
cell instruction_operand::load_float(cell pointer)
{
return (cell)load_value(pointer) - boxed_float_offset;
}
cell instruction_operand::load_byte_array()
{
return (cell)load_value() - byte_array_offset;
}
cell instruction_operand::load_byte_array(cell pointer)
{
return (cell)load_value(pointer) - byte_array_offset;
}
code_block *instruction_operand::load_code_block(cell relative_to)
{
return ((code_block *)load_value(relative_to) - 1);
@ -155,16 +135,6 @@ void instruction_operand::store_value(fixnum absolute_value)
}
}
void instruction_operand::store_float(cell value)
{
store_value((fixnum)value + boxed_float_offset);
}
void instruction_operand::store_byte_array(cell value)
{
store_value((fixnum)value + byte_array_offset);
}
void instruction_operand::store_code_block(code_block *compiled)
{
store_value((cell)compiled->entry_point());

View File

@ -30,10 +30,6 @@ enum relocation_type {
type since its used in a situation where relocation arguments cannot
be passed in, and so RT_DLSYM is inappropriate (Windows only) */
RT_EXCEPTION_HANDLER,
/* pointer to a float's payload */
RT_FLOAT,
/* pointer to a byte array's payload */
RT_BYTE_ARRAY,
};
@ -117,8 +113,6 @@ struct relocation_entry {
case RT_CARDS_OFFSET:
case RT_DECKS_OFFSET:
case RT_EXCEPTION_HANDLER:
case RT_FLOAT:
case RT_BYTE_ARRAY:
return 0;
default:
critical_error("Bad rel type in number_of_parameters()",rel_type());
@ -159,18 +153,12 @@ struct instruction_operand {
fixnum load_value_masked(cell mask, cell bits, cell shift);
fixnum load_value(cell relative_to);
fixnum load_value();
cell load_float(cell relative_to);
cell load_float();
cell load_byte_array(cell relative_to);
cell load_byte_array();
code_block *load_code_block(cell relative_to);
code_block *load_code_block();
void store_value_2_2(fixnum value);
void store_value_masked(fixnum value, cell mask, cell shift);
void store_value(fixnum value);
void store_float(cell value);
void store_byte_array(cell value);
void store_code_block(code_block *compiled);
};

View File

@ -178,8 +178,6 @@ struct bignum : public object {
cell *data() const { return (cell *)(this + 1); }
};
const cell byte_array_offset = 16 - BYTE_ARRAY_TYPE;
struct byte_array : public object {
static const cell type_number = BYTE_ARRAY_TYPE;
static const cell element_size = 1;
@ -244,8 +242,6 @@ struct wrapper : public object {
cell object;
};
const fixnum boxed_float_offset = 8 - FLOAT_TYPE;
/* Assembly code makes assumptions about the layout of this struct */
struct boxed_float : object {
static const cell type_number = FLOAT_TYPE;

View File

@ -192,20 +192,8 @@ struct literal_references_visitor {
void operator()(instruction_operand op)
{
switch(op.rel_type())
{
case RT_LITERAL:
if(op.rel_type() == RT_LITERAL)
op.store_value(visitor->visit_pointer(op.load_value()));
break;
case RT_FLOAT:
op.store_float(visitor->visit_pointer(op.load_float()));
break;
case RT_BYTE_ARRAY:
op.store_byte_array(visitor->visit_pointer(op.load_byte_array()));
break;
default:
break;
}
}
};