From fa79feb68ac1b75192f272acbf716e8236254756 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 8 Sep 2004 06:31:03 +0000 Subject: [PATCH] compiler compiles fib --- library/compiler/assembly-x86.factor | 85 +++++++++++----- library/compiler/compiler.factor | 112 ++++++++++++++------- library/image.factor | 40 ++++++-- library/platform/native/boot-stage2.factor | 1 + library/test/x86-compiler/asm-test.factor | 27 +++++ library/test/x86-compiler/compiler.factor | 77 ++++++++++++++ native/error.c | 2 - native/gc.c | 9 +- native/relocate.c | 18 ++-- native/types.c | 17 +++- native/types.h | 2 +- native/word.c | 2 +- 12 files changed, 298 insertions(+), 94 deletions(-) create mode 100644 library/test/x86-compiler/asm-test.factor create mode 100644 library/test/x86-compiler/compiler.factor diff --git a/library/compiler/assembly-x86.factor b/library/compiler/assembly-x86.factor index b2e0ca2ef3..34cdcf145f 100644 --- a/library/compiler/assembly-x86.factor +++ b/library/compiler/assembly-x86.factor @@ -49,19 +49,14 @@ USE: combinators : I>R ( imm reg -- ) #! MOV TO - dup EAX = [ - drop HEX: b8 compile-byte - ] [ - HEX: 8b compile-byte - 3 shift BIN: 101 bitor compile-byte - ] ifte compile-cell ; + HEX: b8 + compile-byte compile-cell ; : [I]>R ( imm reg -- ) #! MOV INDIRECT TO dup EAX = [ drop HEX: a1 compile-byte ] [ - HEX: 8d compile-byte + HEX: 8b compile-byte 3 shift BIN: 101 bitor compile-byte ] ifte compile-cell ; @@ -71,8 +66,13 @@ USE: combinators : R>[I] ( reg imm -- ) #! MOV INDIRECT TO . - #! Actually only works with EAX (?) - swap HEX: a3 + compile-byte compile-cell ; + #! Actually only works with EAX. + over EAX = [ + nip HEX: a3 compile-byte + ] [ + HEX: 89 compile-byte + swap 3 shift BIN: 101 bitor compile-byte + ] ifte compile-cell ; : [R]>R ( reg reg -- ) #! MOV INDIRECT TO . @@ -89,6 +89,36 @@ USE: combinators compile-cell compile-cell ; +: R-I ( imm reg -- ) + #! SUBTRACT FROM , STORE RESULT IN + over -128 127 between? [ + HEX: 83 compile-byte + HEX: e8 + compile-byte + compile-byte + ] [ + dup EAX = [ + drop HEX: 2d compile-byte + ] [ + HEX: 81 compile-byte + BIN: 11101000 bitor + ] ifte + compile-cell + ] ifte ; + +: CMP-I-[R] ( imm reg -- ) + #! There are two forms of CMP we assemble + #! 83 38 03 cmpl $0x3,(%eax) + #! 81 38 33 33 33 00 cmpl $0x333333,(%eax) + over -128 127 between? [ + HEX: 83 compile-byte + HEX: 38 + compile-byte + compile-byte + ] [ + HEX: 81 compile-byte + HEX: 38 + compile-byte + compile-cell + ] ifte ; + : LITERAL ( cell -- ) #! Push literal on data stack. #! Assume that it is ok to clobber EAX without saving. @@ -100,33 +130,36 @@ USE: combinators #! Push literal on data stack by following an indirect #! pointer. ECX PUSH - ( cell -- ) ECX I>R - ECX ECX [R]>R + ( cell -- ) ECX [I]>R DATASTACK EAX [I]>R ECX EAX R>[R] 4 DATASTACK I+[I] ECX POP ; : POP-DS ( -- ) - #! Pop datastack into EAX. - ( ECX PUSH ) - DATASTACK ECX I>R - ! LEA... - HEX: 8d compile-byte HEX: 41 compile-byte HEX: fc compile-byte - EAX DATASTACK R>[I] - EAX EAX [R]>R - ( ECX POP ) ; + #! Pop datastack, store pointer to datastack top in EAX. + DATASTACK EAX [I]>R + 4 EAX R-I + EAX DATASTACK R>[I] ; -: (JUMP) ( xt opcode -- ) - #! JMP, CALL insn is 5 bytes long +: fixup ( addr where -- ) + #! Encode a relative offset to addr from where at where. + #! Add 4 because addr is relative to *after* insn. + dup >r 4 + - r> set-compiled-cell ; + +: (JUMP) ( xt -- fixup ) #! addr is relative to *after* insn - compile-byte compiled-offset 4 + - compile-cell ; + compiled-offset dup >r 4 + - compile-cell r> ; -: JUMP ( -- ) - HEX: e9 (JUMP) ; +: JUMP ( xt -- fixup ) + #! Push address of branch for fixup + HEX: e9 compile-byte (JUMP) ; -: CALL ( -- ) - HEX: e8 (JUMP) ; +: CALL ( xt -- fixup ) + HEX: e8 compile-byte (JUMP) ; + +: JE ( xt -- fixup ) + HEX: 0f compile-byte HEX: 84 compile-byte (JUMP) ; : RET ( -- ) HEX: c3 compile-byte ; diff --git a/library/compiler/compiler.factor b/library/compiler/compiler.factor index 8bab12a5bc..2103b06d38 100644 --- a/library/compiler/compiler.factor +++ b/library/compiler/compiler.factor @@ -26,42 +26,64 @@ ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. IN: compiler -USE: math -USE: stack -USE: lists USE: combinators -USE: words -USE: namespaces -USE: unparser USE: errors -USE: strings -USE: logic USE: kernel +USE: lists +USE: logic +USE: math +USE: namespaces +USE: parser +USE: stack +USE: strings +USE: unparser USE: vectors +USE: words : pop-literal ( -- obj ) "compile-datastack" get vector-pop ; +: immediate? ( obj -- ? ) + #! fixnums and f have a pointerless representation, and + #! are compiled immediately. Everything else can be moved + #! by GC, and is indexed through a table. + dup fixnum? swap f eq? or ; + : compile-literal ( obj -- ) - dup fixnum? [ + dup immediate? [ address-of LITERAL ] [ intern-literal [LITERAL] ] ifte ; : commit-literals ( -- ) - "compile-datastack" get dup [ compile-literal ] vector-each - 0 swap set-vector-length ; + "compile-datastack" get + dup vector-empty? [ + drop + ] [ + dup [ compile-literal ] vector-each + 0 swap set-vector-length + ] ifte ; : postpone ( obj -- ) #! Literals are not compiled immediately, so that words like #! ifte with special compilation behavior can work. "compile-datastack" get vector-push ; +: tail? ( -- ? ) + "compile-callstack" get vector-empty? ; + +: compiled-xt ( word -- xt ) + "compiled-xt" over word-property dup [ + nip + ] [ + drop word-xt + ] ifte ; + : compile-simple-word ( word -- ) #! Compile a JMP at the end (tail call optimization) - commit-literals word-xt - "compile-last" get [ JUMP ] [ CALL ] ifte ; + commit-literals compiled-xt + tail? [ JUMP ] [ CALL ] ifte drop ; : compile-word ( word -- ) #! If a word has a compiling property, then it has special @@ -72,32 +94,54 @@ USE: vectors drop compile-simple-word ] ifte ; -: compile-atom ( obj -- ) +: begin-compiling-quot ( quot -- ) + "compile-callstack" get vector-push ; + +: end-compiling-quot ( -- ) + "compile-callstack" get vector-pop drop ; + +: compiling ( quot -- ) + #! Called on each iteration of compile-loop, with the + #! remaining quotation. [ - [ word? ] [ compile-word ] - [ drop t ] [ postpone ] - ] cond ; + "compile-callstack" get + dup vector-length pred + swap set-vector-nth + ] [ + end-compiling-quot + ] ifte* ; + +: compile-atom ( obj -- ) + dup word? [ compile-word ] [ postpone ] ifte ; : compile-loop ( quot -- ) - dup [ - unswons - over not "compile-last" set - compile-atom - compile-loop - ] [ - commit-literals drop RET - ] ifte ; - -: compile-quot ( quot -- xt ) [ - "compile-last" off + uncons dup compiling swap compile-atom compile-loop + ] when* ; + +: compile-quot ( quot -- ) + [ + dup begin-compiling-quot compile-loop commit-literals + ] when* ; + +: with-compiler ( quot -- ) + [ 10 "compile-datastack" set - compiled-offset swap compile-loop + 10 "compile-callstack" set + call ] with-scope ; -: compile ( word -- ) - intern dup word-parameter compile-quot swap set-word-xt ; +: begin-compiling ( word -- ) + compiled-offset "compiled-xt" rot set-word-property ; -: call-xt ( xt -- ) - #! For testing. - 0 f f [ set-word-xt ] keep execute ; +: end-compiling ( word -- xt ) + "compiled-xt" over word-property over set-word-xt + f "compiled-xt" rot set-word-property ; + +: compile ( word -- ) + intern dup + begin-compiling + dup word-parameter [ compile-quot RET ] with-compiler + end-compiling ; + +: compiled word compile ; parsing diff --git a/library/image.factor b/library/image.factor index 5a478d035a..531ac6a2b7 100644 --- a/library/image.factor +++ b/library/image.factor @@ -88,10 +88,17 @@ USE: words ( Image header ) +: base + #! We relocate the image to after the header, and leaving + #! two empty cells. This lets us differentiate an F pointer + #! (0/tag 3) from a pointer to the first object in the + #! image. + 2 cell * ; + : header ( -- ) image-magic emit image-version emit - ( relocation base at end of header ) 0 emit + ( relocation base at end of header ) base emit ( bootstrap quotation set later ) 0 emit ( global namespace set later ) 0 emit ( size of heap set later ) 0 emit ; @@ -101,11 +108,16 @@ USE: words : heap-size-offset 5 ; : header-size 6 ; -( Top of heap pointer ) +( Allocator ) -: here ( -- size ) image vector-length header-size - cell * ; -: here-as ( tag -- pointer ) here swap bitor ; -: pad ( -- ) here 8 mod 4 = [ 0 emit ] when ; +: here ( -- size ) + image vector-length header-size - cell * base + ; + +: here-as ( tag -- pointer ) + here swap bitor ; + +: pad ( -- ) + here 8 mod 4 = [ 0 emit ] when ; ( Remember what objects we've compiled ) @@ -135,18 +147,20 @@ USE: words ! Padded with fixnums for 8-byte alignment -: f, object-tag here-as "f" set f-type >header emit 0 'fixnum emit ; -: t, object-tag here-as "t" set t-type >header emit 0 'fixnum emit ; +: t, + object-tag here-as "t" set + t-type >header emit + 0 'fixnum emit ; : 0, 0 'bignum drop ; : 1, 1 'bignum drop ; : -1, -1 'bignum drop ; ( Beginning of the image ) -! The image proper begins with the header, then F, T, +! The image proper begins with the header, then T, ! and the bignums 0, 1, and -1. -: begin ( -- ) header f, t, 0, 1, -1, ; +: begin ( -- ) header t, 0, 1, -1, ; ( Words ) @@ -315,7 +329,8 @@ IN: cross-compiler [ string? ] [ 'string ] [ vector? ] [ 'vector ] [ t = ] [ drop "t" get ] - [ f = ] [ drop "f" get ] + ! f is #define F RETAG(0,OBJECT_TYPE) + [ f = ] [ drop object-tag ] [ drop t ] [ "Cannot cross-compile: " swap cat2 throw ] ] cond ; @@ -329,7 +344,10 @@ IN: cross-compiler namespace-buckets dup >r set-hash r> (set-global) ; -: end ( -- ) global, fixup-words here heap-size-offset fixup ; +: end ( -- ) + global, + fixup-words + here base - heap-size-offset fixup ; ( Image output ) diff --git a/library/platform/native/boot-stage2.factor b/library/platform/native/boot-stage2.factor index 201b516025..b0027006d6 100644 --- a/library/platform/native/boot-stage2.factor +++ b/library/platform/native/boot-stage2.factor @@ -134,6 +134,7 @@ USE: stdio "/library/compiler/assembler.factor" "/library/compiler/assembly-x86.factor" "/library/compiler/compiler.factor" + "/library/compiler/words.factor" "/library/platform/native/primitives.factor" diff --git a/library/test/x86-compiler/asm-test.factor b/library/test/x86-compiler/asm-test.factor new file mode 100644 index 0000000000..65680886e2 --- /dev/null +++ b/library/test/x86-compiler/asm-test.factor @@ -0,0 +1,27 @@ +IN: scratchpad +USE: compiler + +0 EAX I>R +0 ECX I>R + +0 EAX [I]>R +0 ECX [I]>R + +0 EAX I>[R] +0 ECX I>[R] + +EAX 0 R>[I] +ECX 0 R>[I] + +EAX EAX [R]>R +EAX ECX [R]>R +ECX EAX [R]>R +ECX ECX [R]>R + +EAX EAX R>[R] +EAX ECX R>[R] +ECX EAX R>[R] +ECX ECX R>[R] + +4 0 I+[I] +0 4 I+[I] diff --git a/library/test/x86-compiler/compiler.factor b/library/test/x86-compiler/compiler.factor new file mode 100644 index 0000000000..7d1988f608 --- /dev/null +++ b/library/test/x86-compiler/compiler.factor @@ -0,0 +1,77 @@ +IN: scratchpad +USE: compiler +USE: test +USE: math +USE: stack +USE: kernel +USE: combinators +USE: words + +: no-op ; compiled + +[ ] [ no-op ] unit-test + +: literals 3 5 ; compiled + +[ 3 5 ] [ literals ] unit-test + +: literals&tail-call 3 5 + ; compiled + +[ 8 ] [ literals&tail-call ] unit-test + +: two-calls dup * ; compiled + +[ 25 ] [ 5 two-calls ] unit-test + +: mix-test 3 5 + 6 * ; compiled + +[ 48 ] [ mix-test ] unit-test + +: indexed-literal-test "hello world" ; compiled + +garbage-collection +garbage-collection + +[ "hello world" ] [ indexed-literal-test ] unit-test + +: dummy-ifte-1 t [ ] [ ] ifte ; compiled + +[ ] [ dummy-ifte-1 ] unit-test + +: dummy-ifte-2 f [ ] [ ] ifte ; compiled + +[ ] [ dummy-ifte-2 ] unit-test + +: dummy-ifte-3 t [ 1 ] [ 2 ] ifte ; compiled + +[ 1 ] [ dummy-ifte-3 ] unit-test + +: dummy-ifte-4 f [ 1 ] [ 2 ] ifte ; compiled + +[ 2 ] [ dummy-ifte-4 ] unit-test + +: dummy-ifte-5 0 dup 1 <= [ drop 1 ] [ ] ifte ; compiled + +[ 1 ] [ dummy-ifte-5 ] unit-test + +: dummy-ifte-6 + dup 1 <= [ + drop 1 + ] [ + 1 - dup swap 1 - + + ] ifte ; + +[ 17 ] [ 10 dummy-ifte-6 ] unit-test + +: dead-code-rec + t [ + #{ 3 2 } + ] [ + dead-code-rec + ] ifte ; compiled + +[ #{ 3 2 } ] [ dead-code-rec ] unit-test + +: one-rec [ f one-rec ] [ "hi" ] ifte ; compiled + +[ "hi" ] [ t one-rec ] unit-test diff --git a/native/error.c b/native/error.c index 29ce6771d6..f0a77ee833 100644 --- a/native/error.c +++ b/native/error.c @@ -15,8 +15,6 @@ void critical_error(char* msg, CELL tagged) void fix_stacks(void) { - fprintf(stderr,"%x\n",ds); - fprintf(stderr,"%x\n",ds_bot); if(STACK_UNDERFLOW(ds,ds_bot) || STACK_OVERFLOW(ds,ds_bot)) reset_datastack(); diff --git a/native/gc.c b/native/gc.c index 7401d72b15..0af8d727b2 100644 --- a/native/gc.c +++ b/native/gc.c @@ -31,12 +31,8 @@ void copy_object(CELL* handle) CELL tag = TAG(pointer); CELL header, newpointer; - if(tag == FIXNUM_TYPE) - { - /* convinience */ - gc_debug("FIXNUM",pointer); + if(tag == FIXNUM_TYPE || pointer == F) return; - } if(in_zone(&active,pointer)) critical_error("copy_object given newspace ptr",pointer); @@ -118,8 +114,7 @@ void collect_roots(void) CELL ptr; gc_debug("collect_roots",scan); - /* these two must be the first in the heap */ - copy_object(&F); + /*T must be the first in the heap */ copy_object(&T); /* the bignum 0 1 -1 constants must be the next three */ copy_bignum_constants(); diff --git a/native/relocate.c b/native/relocate.c index 16e1eda1da..ff5a8ede79 100644 --- a/native/relocate.c +++ b/native/relocate.c @@ -2,14 +2,12 @@ void fixup(CELL* cell) { - if(TAG(*cell) != FIXNUM_TYPE) + if(TAG(*cell) != FIXNUM_TYPE && *cell != F) *cell += (active.base - relocation_base); } void relocate_object() { - CELL size; - size = untagged_object_size(relocating); switch(untag_header(get(relocating))) { case WORD_TYPE: @@ -32,20 +30,27 @@ void relocate_object() break; } - relocating += size; } void relocate_next() { + CELL size = CELLS; + switch(TAG(get(relocating))) { case HEADER_TYPE: + size = untagged_object_size(relocating); relocate_object(); break; + case OBJECT_TYPE: + if(get(relocating) == F) + break; + /* fall thru */ default: fixup((CELL*)relocating); - relocating += CELLS; + break; } + relocating += size; } void init_object(CELL* handle, CELL type) @@ -65,8 +70,7 @@ void relocate(CELL r) relocating = active.base; - /* The first two objects in the image must always be F, T */ - init_object(&F,F_TYPE); + /* The first object in the image must always T */ init_object(&T,T_TYPE); /* The next three must be bignum 0, 1, -1 */ diff --git a/native/types.c b/native/types.c index ea268728bc..7c6cff7559 100644 --- a/native/types.c +++ b/native/types.c @@ -3,10 +3,15 @@ CELL type_of(CELL tagged) { CELL tag = TAG(tagged); - if(tag != OBJECT_TYPE) - return tag; + if(tag == OBJECT_TYPE) + { + if(tagged == F) + return F_TYPE; + else + return untag_header(get(UNTAG(tagged))); + } else - return untag_header(get(UNTAG(tagged))); + return tag; } bool typep(CELL type, CELL tagged) @@ -67,13 +72,15 @@ CELL object_size(CELL pointer) CELL untagged_object_size(CELL pointer) { CELL size; - + + if(pointer == F) + return 0; + switch(untag_header(get(pointer))) { case WORD_TYPE: size = sizeof(WORD); break; - case F_TYPE: case T_TYPE: size = CELLS * 2; break; diff --git a/native/types.h b/native/types.h index e38d1f5861..b88203d235 100644 --- a/native/types.h +++ b/native/types.h @@ -18,7 +18,7 @@ /* Canonical F object */ #define F_TYPE 6 -CELL F; +#define F RETAG(0,OBJECT_TYPE) /* Canonical T object */ #define T_TYPE 7 diff --git a/native/word.c b/native/word.c index 37719c9b55..e1fc208977 100644 --- a/native/word.c +++ b/native/word.c @@ -111,7 +111,7 @@ void primitive_set_word_allot_count(void) void fixup_word(WORD* word) { - word->xt = primitive_to_xt(word->primitive); + update_xt(word); fixup(&word->parameter); fixup(&word->plist); }