From 22fbe01c1750fcfe05c592025cb55d06d288c75e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 3 Feb 2010 21:27:18 +1300 Subject: [PATCH] Compiled code size optimizations for x86-64 resulting in development image size reduction of about 350kb - RIP-relative LEA is now used to load program counter in prolog and tail calls; saves 3 bytes of code and 4 bytes of relocation information over an immediate MOV - VM field accesses no longer involve a redundant addition --- basis/cpu/architecture/architecture.factor | 1 + basis/cpu/ppc/ppc.factor | 26 +++++++++------------- basis/cpu/x86/32/32.factor | 12 +++++++--- basis/cpu/x86/32/bootstrap.factor | 5 +++++ basis/cpu/x86/64/64.factor | 14 ++++++++---- basis/cpu/x86/64/bootstrap.factor | 7 +++++- basis/cpu/x86/bootstrap.factor | 5 ----- basis/cpu/x86/x86.factor | 23 +++++++------------ vm/jit.hpp | 2 ++ 9 files changed, 51 insertions(+), 44 deletions(-) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 03090dc4b5..734241a5d3 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -434,6 +434,7 @@ HOOK: %set-alien-double cpu ( ptr offset value -- ) HOOK: %set-alien-vector cpu ( ptr offset value rep -- ) HOOK: %alien-global cpu ( dst symbol library -- ) +HOOK: %vm-field cpu ( dst fieldname -- ) HOOK: %vm-field-ptr cpu ( dst fieldname -- ) HOOK: %allot cpu ( dst size class temp -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index a914b3551e..88e54d5a88 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -57,10 +57,11 @@ CONSTANT: vm-reg 15 : %load-vm-addr ( reg -- ) vm-reg MR ; -: %load-vm-field-addr ( reg symbol -- ) - [ vm-reg ] dip vm-field-offset ADDI ; +M: ppc %vm-field ( dst field -- ) + [ vm-reg ] dip vm-field-offset LWZ ; -M: ppc %vm-field-ptr ( dst field -- ) %load-vm-field-addr ; +M: ppc %vm-field-ptr ( dst field -- ) + [ vm-reg ] dip vm-field-offset ADDI ; GENERIC: loc-reg ( loc -- reg ) @@ -601,26 +602,19 @@ M: ppc %push-stack ( -- ) ds-reg ds-reg 4 ADDI int-regs return-reg ds-reg 0 STW ; -:: %load-context-datastack ( dst -- ) - ! Load context struct - dst "ctx" %vm-field-ptr - dst dst 0 LWZ - ! Load context datastack pointer - dst dst "datastack" context-field-offset ADDI ; - M: ppc %push-context-stack ( -- ) - 11 %load-context-datastack - 12 11 0 LWZ + 11 "ctx" %vm-field + 12 11 "datastack" context-field-offset LWZ 12 12 4 ADDI - 12 11 0 STW + 12 11 "datastack" context-field-offset STW int-regs return-reg 12 0 STW ; M: ppc %pop-context-stack ( -- ) - 11 %load-context-datastack - 12 11 0 LWZ + 11 "ctx" %vm-field + 12 11 "datastack" context-field-offset LWZ int-regs return-reg 12 0 LWZ 12 12 4 SUBI - 12 11 0 STW ; + 12 11 "datastack" context-field-offset STW ; M: ppc %unbox ( n rep func -- ) ! Value must be in r3 diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 46216be6ea..b8b621ee11 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -27,6 +27,9 @@ M: x86.32 temp-reg ECX ; M: x86.32 %mov-vm-ptr ( reg -- ) 0 MOV 0 rc-absolute-cell rel-vm ; +M: x86.32 %vm-field ( dst field -- ) + [ 0 [] MOV ] dip vm-field-offset rc-absolute-cell rel-vm ; + M: x86.32 %vm-field-ptr ( dst field -- ) [ 0 MOV ] dip vm-field-offset rc-absolute-cell rel-vm ; @@ -102,6 +105,9 @@ M: x86.32 %prologue ( n -- ) 0 PUSH rc-absolute-cell rel-this 3 cells - decr-stack-reg ; +M: x86.32 %prepare-jump + pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here ; + M: x86.32 %load-param-reg stack-params assert= [ [ EAX ] dip local@ MOV ] dip @@ -160,10 +166,10 @@ M: x86.32 %pop-stack ( n -- ) EAX swap ds-reg reg-stack MOV ; M: x86.32 %pop-context-stack ( -- ) - temp-reg %load-context-datastack - EAX temp-reg [] MOV + temp-reg "ctx" %vm-field + EAX temp-reg "datastack" context-field-offset [+] MOV EAX EAX [] MOV - temp-reg [] bootstrap-cell SUB ; + temp-reg "datastack" context-field-offset [+] bootstrap-cell SUB ; : call-unbox-func ( func -- ) 4 save-vm-ptr diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor index d11aa952d9..cf2d09501c 100644 --- a/basis/cpu/x86/32/bootstrap.factor +++ b/basis/cpu/x86/32/bootstrap.factor @@ -36,6 +36,11 @@ IN: bootstrap.x86 ESP stack-frame-size 3 bootstrap-cells - SUB ] jit-prolog jit-define +[ + temp3 0 MOV rc-absolute-cell rt-here jit-rel + 0 JMP rc-relative rt-entry-point-pic-tail jit-rel +] jit-word-jump jit-define + : jit-load-vm ( -- ) vm-reg 0 MOV 0 rc-absolute-cell jit-vm ; diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index 5213030bdf..856127aedf 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -42,17 +42,23 @@ M: x86.64 machine-registers M: x86.64 %mov-vm-ptr ( reg -- ) vm-reg MOV ; +M: x86.64 %vm-field ( dst field -- ) + [ vm-reg ] dip vm-field-offset [+] MOV ; + M: x86.64 %vm-field-ptr ( dst field -- ) [ vm-reg ] dip vm-field-offset [+] LEA ; : param@ ( n -- op ) reserved-stack-space + stack@ ; M: x86.64 %prologue ( n -- ) - temp-reg 0 MOV rc-absolute-cell rel-this + temp-reg -7 [] LEA dup PUSH temp-reg PUSH stack-reg swap 3 cells - SUB ; +M: x86.64 %prepare-jump + pic-tail-reg xt-tail-pic-offset [] LEA ; + : load-cards-offset ( dst -- ) 0 MOV rc-absolute-cell rel-cards-offset ; @@ -104,10 +110,10 @@ M: x86.64 %pop-stack ( n -- ) param-reg-0 swap ds-reg reg-stack MOV ; M: x86.64 %pop-context-stack ( -- ) - temp-reg %load-context-datastack - param-reg-0 temp-reg [] MOV + temp-reg "ctx" %vm-field + param-reg-0 temp-reg "datastack" context-field-offset [+] MOV param-reg-0 param-reg-0 [] MOV - temp-reg [] bootstrap-cell SUB ; + temp-reg "datastack" context-field-offset [+] bootstrap-cell SUB ; M:: x86.64 %unbox ( n rep func -- ) param-reg-1 %mov-vm-ptr diff --git a/basis/cpu/x86/64/bootstrap.factor b/basis/cpu/x86/64/bootstrap.factor index 828598074f..48b5dfd65d 100644 --- a/basis/cpu/x86/64/bootstrap.factor +++ b/basis/cpu/x86/64/bootstrap.factor @@ -28,7 +28,7 @@ IN: bootstrap.x86 [ ! load entry point - safe-reg 0 MOV rc-absolute-cell rt-this jit-rel + safe-reg -7 [] LEA ! save stack frame size stack-frame-size PUSH ! push entry point @@ -37,6 +37,11 @@ IN: bootstrap.x86 RSP stack-frame-size 3 bootstrap-cells - SUB ] jit-prolog jit-define +[ + temp3 5 [] LEA + 0 JMP rc-relative rt-entry-point-pic-tail jit-rel +] jit-word-jump jit-define + : jit-load-context ( -- ) ctx-reg vm-reg vm-context-offset [+] MOV ; diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index 2304f9c9f3..19ac0189a3 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -76,11 +76,6 @@ big-endian off ds-reg [] temp0 MOV ] jit-push jit-define -[ - temp3 0 MOV rc-absolute-cell rt-here jit-rel - 0 JMP rc-relative rt-entry-point-pic-tail jit-rel -] jit-word-jump jit-define - [ 0 CALL rc-relative rt-entry-point-pic jit-rel ] jit-word-call jit-define diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 0cd557896b..e54e307f79 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -88,8 +88,10 @@ M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ; #! See the comment in vm/cpu-x86.hpp 4 1 + ; inline +HOOK: %prepare-jump cpu ( -- ) + M: x86 %jump ( word -- ) - pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here + %prepare-jump 0 JMP rc-relative rel-word-pic-tail ; M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ; @@ -474,17 +476,10 @@ M: x86 %push-stack ( -- ) ds-reg cell ADD ds-reg [] int-regs return-reg MOV ; -:: %load-context-datastack ( dst -- ) - ! Load context struct - dst "ctx" %vm-field-ptr - dst dst [] MOV - ! Load context datastack pointer - dst "datastack" context-field-offset ADD ; - M: x86 %push-context-stack ( -- ) - temp-reg %load-context-datastack - temp-reg [] bootstrap-cell ADD - temp-reg temp-reg [] MOV + temp-reg "ctx" %vm-field + temp-reg "datastack" context-field-offset [+] bootstrap-cell ADD + temp-reg temp-reg "datastack" context-field-offset [+] MOV temp-reg [] int-regs return-reg MOV ; M: x86 %epilogue ( n -- ) cell - incr-stack-reg ; @@ -1409,8 +1404,7 @@ M: x86 %loop-entry 16 code-alignment [ NOP ] times ; M:: x86 %restore-context ( temp1 temp2 -- ) #! Load Factor stack pointers on entry from C to Factor. #! Also save callstack bottom! - temp1 "ctx" %vm-field-ptr - temp1 temp1 [] MOV + temp1 "ctx" %vm-field temp2 stack-reg stack-frame get total-size>> cell - [+] LEA temp1 "callstack-bottom" context-field-offset [+] temp2 MOV ds-reg temp1 "datastack" context-field-offset [+] MOV @@ -1420,8 +1414,7 @@ M:: x86 %save-context ( temp1 temp2 -- ) #! Save Factor stack pointers in case the C code calls a #! callback which does a GC, which must reliably trace #! all roots. - temp1 "ctx" %vm-field-ptr - temp1 temp1 [] MOV + temp1 "ctx" %vm-field temp2 stack-reg cell neg [+] LEA temp1 "callstack-top" context-field-offset [+] temp2 MOV temp1 "datastack" context-field-offset [+] ds-reg MOV diff --git a/vm/jit.hpp b/vm/jit.hpp index 277aecb66d..3889d92fc5 100644 --- a/vm/jit.hpp +++ b/vm/jit.hpp @@ -33,7 +33,9 @@ struct jit { void word_jump(cell word_) { data_root word(word_,parent); +#ifndef FACTOR_AMD64 literal(tag_fixnum(xt_tail_pic_offset)); +#endif literal(word.value()); emit(parent->special_objects[JIT_WORD_JUMP]); }