Merge branch 'master' of factorcode.org:/git/factor
commit
1e7cc1613e
|
|
@ -1,8 +1,7 @@
|
|||
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
|
||||
USING: accessors alien alien.c-types alien.arrays alien.strings arrays
|
||||
byte-arrays cpu.architecture fry io io.encodings.binary
|
||||
io.files io.streams.memory kernel libc math sequences words
|
||||
byte-vectors ;
|
||||
USING: accessors alien alien.c-types alien.arrays alien.strings
|
||||
arrays byte-arrays cpu.architecture fry io io.encodings.binary
|
||||
io.files io.streams.memory kernel libc math sequences words ;
|
||||
IN: alien.data
|
||||
|
||||
GENERIC: require-c-array ( c-type -- )
|
||||
|
|
@ -63,13 +62,6 @@ M: memory-stream stream-read
|
|||
swap memory>byte-array
|
||||
] [ [ + ] change-index drop ] 2bi ;
|
||||
|
||||
M: byte-vector stream-write
|
||||
[ dup byte-length tail-slice ]
|
||||
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
||||
[ drop byte-length ]
|
||||
2tri
|
||||
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
||||
|
||||
M: value-type c-type-rep drop int-rep ;
|
||||
|
||||
M: value-type c-type-getter
|
||||
|
|
@ -83,4 +75,3 @@ M: array c-type-boxer-quot
|
|||
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
|
||||
|
||||
M: array c-type-unboxer-quot drop [ >c-ptr ] ;
|
||||
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ TUPLE: biassoc from to ;
|
|||
|
||||
M: biassoc assoc-size from>> assoc-size ;
|
||||
|
||||
M: biassoc at* from>> at* ;
|
||||
M: biassoc at* from>> at* ; inline
|
||||
|
||||
M: biassoc value-at* to>> at* ;
|
||||
M: biassoc value-at* to>> at* ; inline
|
||||
|
||||
: once-at ( value key assoc -- )
|
||||
2dup key? [ 3drop ] [ set-at ] if ;
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
! Copyright (C) 2008, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays combinators hints kernel locals math
|
||||
math.order sequences ;
|
||||
math.order sequences sequences.private ;
|
||||
IN: binary-search
|
||||
|
||||
<PRIVATE
|
||||
|
||||
:: (search) ( seq from to quot: ( elt -- <=> ) -- i elt )
|
||||
from to + 2/ :> midpoint@
|
||||
midpoint@ seq nth :> midpoint
|
||||
midpoint@ seq nth-unsafe :> midpoint
|
||||
|
||||
to from - 1 <= [
|
||||
midpoint@ midpoint
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ gc
|
|||
[ optimized? not ] filter compile ;
|
||||
|
||||
"debug-compiler" get [
|
||||
|
||||
|
||||
nl
|
||||
"Compiling..." write flush
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ gc
|
|||
|
||||
curry compose uncurry
|
||||
|
||||
array-nth set-array-nth length>>
|
||||
array-nth set-array-nth
|
||||
|
||||
wrap probe
|
||||
|
||||
|
|
@ -117,4 +117,6 @@ gc
|
|||
|
||||
" done" print flush
|
||||
|
||||
"io.streams.byte-array.fast" require
|
||||
|
||||
] unless
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@ M: ##copy analyze-aliases*
|
|||
M: ##compare analyze-aliases*
|
||||
call-next-method
|
||||
dup useless-compare? [
|
||||
dst>> \ f type-number \ ##load-immediate new-insn
|
||||
dst>> f \ ##load-constant new-insn
|
||||
analyze-aliases*
|
||||
] when ;
|
||||
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ M: #recursive emit-node
|
|||
and ;
|
||||
|
||||
: emit-trivial-if ( -- )
|
||||
ds-pop \ f type-number cc/= ^^compare-imm ds-push ;
|
||||
ds-pop f cc/= ^^compare-imm ds-push ;
|
||||
|
||||
: trivial-not-if? ( #if -- ? )
|
||||
children>> first2
|
||||
|
|
@ -132,12 +132,12 @@ M: #recursive emit-node
|
|||
and ;
|
||||
|
||||
: emit-trivial-not-if ( -- )
|
||||
ds-pop \ f type-number cc= ^^compare-imm ds-push ;
|
||||
ds-pop f cc= ^^compare-imm ds-push ;
|
||||
|
||||
: emit-actual-if ( #if -- )
|
||||
! Inputs to the final instruction need to be copied because of
|
||||
! loc>vreg sync
|
||||
ds-pop any-rep ^^copy \ f type-number cc/= ##compare-imm-branch emit-if ;
|
||||
ds-pop any-rep ^^copy f cc/= ##compare-imm-branch emit-if ;
|
||||
|
||||
M: #if emit-node
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! Copyright (C) 2008, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays byte-arrays kernel layouts math
|
||||
namespaces sequences combinators splitting parser effects
|
||||
words cpu.architecture compiler.cfg.registers
|
||||
USING: accessors arrays byte-arrays combinators.short-circuit
|
||||
kernel layouts math namespaces sequences combinators splitting
|
||||
parser effects words cpu.architecture compiler.cfg.registers
|
||||
compiler.cfg.instructions compiler.cfg.instructions.syntax ;
|
||||
IN: compiler.cfg.hats
|
||||
|
||||
|
|
@ -41,11 +41,13 @@ insn-classes get [
|
|||
|
||||
>>
|
||||
|
||||
: immutable? ( obj -- ? )
|
||||
{ [ float? ] [ word? ] [ not ] } 1|| ; inline
|
||||
|
||||
: ^^load-literal ( obj -- dst )
|
||||
[ next-vreg dup ] dip {
|
||||
{ [ dup not ] [ drop \ f type-number ##load-immediate ] }
|
||||
{ [ dup fixnum? ] [ tag-fixnum ##load-immediate ] }
|
||||
{ [ dup float? ] [ ##load-constant ] }
|
||||
{ [ dup immutable? ] [ ##load-constant ] }
|
||||
[ ##load-reference ]
|
||||
} cond ;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,10 @@ INSN: ##load-constant
|
|||
def: dst/int-rep
|
||||
constant: obj ;
|
||||
|
||||
INSN: ##load-double
|
||||
def: dst/double-rep
|
||||
constant: val ;
|
||||
|
||||
INSN: ##peek
|
||||
def: dst/int-rep
|
||||
literal: loc ;
|
||||
|
|
|
|||
|
|
@ -20,9 +20,6 @@ IN: compiler.cfg.intrinsics.fixnum
|
|||
0 cc= ^^compare-imm
|
||||
ds-push ;
|
||||
|
||||
: tag-literal ( n -- tagged )
|
||||
literal>> [ tag-fixnum ] [ \ f type-number ] if* ;
|
||||
|
||||
: emit-fixnum-op ( insn -- )
|
||||
[ 2inputs ] dip call ds-push ; inline
|
||||
|
||||
|
|
@ -44,7 +41,7 @@ IN: compiler.cfg.intrinsics.fixnum
|
|||
{ [ dup 0 [-inf,a] interval-subset? ] [ drop emit-fixnum-right-shift ] }
|
||||
[ drop emit-fixnum-shift-general ]
|
||||
} cond ;
|
||||
|
||||
|
||||
: emit-fixnum-bitnot ( -- )
|
||||
ds-pop ^^not tag-mask get ^^xor-imm ds-push ;
|
||||
|
||||
|
|
|
|||
|
|
@ -68,23 +68,23 @@ PRIVATE>
|
|||
tri
|
||||
] with-compilation-unit
|
||||
|
||||
: each-def-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
|
||||
: each-def-rep ( insn vreg-quot: ( vreg rep -- ) -- )
|
||||
[ [ defs-vreg ] [ defs-vreg-rep ] bi ] dip with when* ; inline
|
||||
|
||||
: each-use-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
|
||||
: each-use-rep ( insn vreg-quot: ( vreg rep -- ) -- )
|
||||
[ [ uses-vregs ] [ uses-vreg-reps ] bi ] dip 2each ; inline
|
||||
|
||||
: each-temp-rep ( ... insn vreg-quot: ( ... vreg rep -- ... ) -- ... )
|
||||
: each-temp-rep ( insn vreg-quot: ( vreg rep -- ) -- )
|
||||
[ [ temp-vregs ] [ temp-vreg-reps ] bi ] dip 2each ; inline
|
||||
|
||||
: each-rep ( insn vreg-quot: ( vreg rep -- ) -- )
|
||||
[ each-def-rep ] [ each-use-rep ] [ each-temp-rep ] 2tri ; inline
|
||||
|
||||
: with-vreg-reps ( ..a cfg vreg-quot: ( ..a vreg rep -- ..b ) -- ..b )
|
||||
'[
|
||||
[ basic-block set ] [
|
||||
[
|
||||
_
|
||||
[ each-def-rep ]
|
||||
[ each-use-rep ]
|
||||
[ each-temp-rep ] 2tri
|
||||
_ each-rep
|
||||
] each-non-phi
|
||||
] bi
|
||||
] each-basic-block ; inline
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
USING: tools.test cpu.architecture
|
||||
compiler.cfg.registers compiler.cfg.instructions
|
||||
compiler.cfg.representations.preferred ;
|
||||
USING: accessors compiler.cfg compiler.cfg.debugger
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.representations.preferred cpu.architecture kernel
|
||||
namespaces tools.test sequences arrays system ;
|
||||
IN: compiler.cfg.representations
|
||||
|
||||
[ { double-rep double-rep } ] [
|
||||
|
|
@ -16,4 +17,111 @@ IN: compiler.cfg.representations
|
|||
{ dst 5 }
|
||||
{ src 3 }
|
||||
} defs-vreg-rep
|
||||
] unit-test
|
||||
] unit-test
|
||||
|
||||
: test-representations ( -- )
|
||||
cfg new 0 get >>entry dup cfg set select-representations drop ;
|
||||
|
||||
! Make sure cost calculation isn't completely wrong
|
||||
V{
|
||||
T{ ##prologue }
|
||||
T{ ##branch }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##peek f 1 D 0 }
|
||||
T{ ##peek f 2 D 1 }
|
||||
T{ ##add-float f 3 1 2 }
|
||||
T{ ##replace f 3 D 0 }
|
||||
T{ ##replace f 3 D 1 }
|
||||
T{ ##replace f 3 D 2 }
|
||||
T{ ##branch }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue }
|
||||
T{ ##return }
|
||||
} 2 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 2 edge
|
||||
|
||||
[ ] [ test-representations ] unit-test
|
||||
|
||||
[ 1 ] [ 1 get instructions>> [ ##allot? ] count ] unit-test
|
||||
|
||||
cpu x86.32? [
|
||||
|
||||
! Make sure load-constant is converted into load-double
|
||||
V{
|
||||
T{ ##prologue }
|
||||
T{ ##branch }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##peek f 1 D 0 }
|
||||
T{ ##load-constant f 2 0.5 }
|
||||
T{ ##add-float f 3 1 2 }
|
||||
T{ ##replace f 3 D 0 }
|
||||
T{ ##branch }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue }
|
||||
T{ ##return }
|
||||
} 2 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 2 edge
|
||||
|
||||
[ ] [ test-representations ] unit-test
|
||||
|
||||
[ t ] [ 1 get instructions>> second ##load-double? ] unit-test
|
||||
|
||||
! Make sure phi nodes are handled in a sane way
|
||||
V{
|
||||
T{ ##prologue }
|
||||
T{ ##branch }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##peek f 1 D 0 }
|
||||
T{ ##compare-imm-branch f 1 2 }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##load-constant f 2 1.5 }
|
||||
T{ ##branch }
|
||||
} 2 test-bb
|
||||
|
||||
V{
|
||||
T{ ##load-constant f 3 2.5 }
|
||||
T{ ##branch }
|
||||
} 3 test-bb
|
||||
|
||||
V{
|
||||
T{ ##phi f 4 }
|
||||
T{ ##peek f 5 D 0 }
|
||||
T{ ##add-float f 6 4 5 }
|
||||
T{ ##replace f 6 D 0 }
|
||||
} 4 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue }
|
||||
T{ ##return }
|
||||
} 5 test-bb
|
||||
|
||||
test-diamond
|
||||
4 5 edge
|
||||
|
||||
2 get 2 2array
|
||||
3 get 3 2array 2array 4 get instructions>> first (>>inputs)
|
||||
|
||||
[ ] [ test-representations ] unit-test
|
||||
|
||||
[ t ] [ 2 get instructions>> first ##load-double? ] unit-test
|
||||
|
||||
[ t ] [ 3 get instructions>> first ##load-double? ] unit-test
|
||||
|
||||
[ t ] [ 4 get instructions>> first ##phi? ] unit-test
|
||||
] when
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2009 Slava Pestov
|
||||
! Copyright (C) 2009, 2010 Slava Pestov
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel fry accessors sequences assocs sets namespaces
|
||||
arrays combinators combinators.short-circuit math make locals
|
||||
|
|
@ -91,8 +91,8 @@ SYMBOL: possibilities
|
|||
: possible ( vreg -- reps ) possibilities get at ;
|
||||
|
||||
: compute-possibilities ( cfg -- )
|
||||
H{ } clone [ '[ swap _ conjoin-at ] with-vreg-reps ] keep
|
||||
[ keys ] assoc-map possibilities set ;
|
||||
H{ } clone [ '[ swap _ adjoin-at ] with-vreg-reps ] keep
|
||||
[ members ] assoc-map possibilities set ;
|
||||
|
||||
! Compute vregs which must remain tagged for their lifetime.
|
||||
SYMBOL: always-boxed
|
||||
|
|
@ -119,15 +119,18 @@ SYMBOL: always-boxed
|
|||
SYMBOL: costs
|
||||
|
||||
: init-costs ( -- )
|
||||
possibilities get [ [ 0 ] H{ } map>assoc ] assoc-map costs set ;
|
||||
possibilities get [ drop H{ } clone ] assoc-map costs set ;
|
||||
|
||||
: record-possibility ( rep vreg -- )
|
||||
costs get at [ 0 or ] change-at ;
|
||||
|
||||
: increase-cost ( rep vreg -- )
|
||||
! Increase cost of keeping vreg in rep, making a choice of rep less
|
||||
! likely.
|
||||
[ basic-block get loop-nesting-at ] 2dip costs get at at+ ;
|
||||
costs get at [ 0 or basic-block get loop-nesting-at 1 + + ] change-at ;
|
||||
|
||||
: maybe-increase-cost ( possible vreg preferred -- )
|
||||
pick eq? [ 2drop ] [ increase-cost ] if ;
|
||||
pick eq? [ record-possibility ] [ increase-cost ] if ;
|
||||
|
||||
: representation-cost ( vreg preferred -- )
|
||||
! 'preferred' is a representation that the instruction can accept with no cost.
|
||||
|
|
@ -137,11 +140,29 @@ SYMBOL: costs
|
|||
[ '[ _ _ maybe-increase-cost ] ]
|
||||
2bi each ;
|
||||
|
||||
GENERIC: compute-insn-costs ( insn -- )
|
||||
|
||||
M: ##load-constant compute-insn-costs
|
||||
! There's no cost to unboxing the result of a ##load-constant
|
||||
drop ;
|
||||
|
||||
M: insn compute-insn-costs [ representation-cost ] each-rep ;
|
||||
|
||||
: compute-costs ( cfg -- costs )
|
||||
init-costs [ representation-cost ] with-vreg-reps costs get ;
|
||||
init-costs
|
||||
[
|
||||
[ basic-block set ]
|
||||
[
|
||||
[
|
||||
compute-insn-costs
|
||||
] each-non-phi
|
||||
] bi
|
||||
] each-basic-block
|
||||
costs get ;
|
||||
|
||||
! For every vreg, compute preferred representation, that minimizes costs.
|
||||
: minimize-costs ( costs -- representations )
|
||||
[ nip assoc-empty? not ] assoc-filter
|
||||
[ >alist alist-min first ] assoc-map ;
|
||||
|
||||
: compute-representations ( cfg -- )
|
||||
|
|
@ -150,6 +171,54 @@ SYMBOL: costs
|
|||
bi assoc-union
|
||||
representations set ;
|
||||
|
||||
! PHI nodes require special treatment
|
||||
! If the output of a phi instruction is only used as the input to another
|
||||
! phi instruction, then we want to use the same representation for both
|
||||
! if possible.
|
||||
SYMBOL: phis
|
||||
|
||||
: collect-phis ( cfg -- )
|
||||
H{ } clone phis set
|
||||
[
|
||||
phis get
|
||||
'[ [ inputs>> values ] [ dst>> ] bi _ set-at ] each-phi
|
||||
] each-basic-block ;
|
||||
|
||||
SYMBOL: work-list
|
||||
|
||||
: add-to-work-list ( vregs -- )
|
||||
work-list get push-all-front ;
|
||||
|
||||
: rep-assigned ( vregs -- vregs' )
|
||||
representations get '[ _ key? ] filter ;
|
||||
|
||||
: rep-not-assigned ( vregs -- vregs' )
|
||||
representations get '[ _ key? not ] filter ;
|
||||
|
||||
: add-ready-phis ( -- )
|
||||
phis get keys rep-assigned add-to-work-list ;
|
||||
|
||||
: process-phi ( dst -- )
|
||||
! If dst = phi(src1,src2,...) and dst's representation has been
|
||||
! determined, assign that representation to each one of src1,...
|
||||
! that does not have a representation yet, and process those, too.
|
||||
dup phis get at* [
|
||||
[ rep-of ] [ rep-not-assigned ] bi*
|
||||
[ [ set-rep-of ] with each ] [ add-to-work-list ] bi
|
||||
] [ 2drop ] if ;
|
||||
|
||||
: remaining-phis ( -- )
|
||||
phis get keys rep-not-assigned { } assert-sequence= ;
|
||||
|
||||
: process-phis ( -- )
|
||||
<hashed-dlist> work-list set
|
||||
add-ready-phis
|
||||
work-list get [ process-phi ] slurp-deque
|
||||
remaining-phis ;
|
||||
|
||||
: compute-phi-representations ( cfg -- )
|
||||
collect-phis process-phis ;
|
||||
|
||||
! Insert conversions. This introduces new temporaries, so we need
|
||||
! to rename opearands too.
|
||||
|
||||
|
|
@ -188,7 +257,7 @@ SYMBOLS: renaming-set needs-renaming? ;
|
|||
: record-renaming ( from to -- )
|
||||
2array renaming-set get push needs-renaming? on ;
|
||||
|
||||
:: (compute-renaming-set) ( ..a vreg required quot: ( ..a vreg preferred required -- ..b ) -- ..b )
|
||||
:: (compute-renaming-set) ( vreg required quot: ( vreg preferred required -- new-vreg ) -- )
|
||||
vreg rep-of :> preferred
|
||||
preferred required eq?
|
||||
[ vreg no-renaming ]
|
||||
|
|
@ -217,15 +286,16 @@ RENAMING: convert [ converted-value ] [ converted-value ] [ ]
|
|||
|
||||
GENERIC: conversions-for-insn ( insn -- )
|
||||
|
||||
SYMBOL: phi-mappings
|
||||
M: ##phi conversions-for-insn , ;
|
||||
|
||||
! compiler.cfg.cssa inserts conversions which convert phi inputs into
|
||||
! the representation of the output. However, we still have to do some
|
||||
! processing here, because if the only node that uses the output of
|
||||
! the phi instruction is another phi instruction then this phi node's
|
||||
! output won't have a representation assigned.
|
||||
M: ##phi conversions-for-insn
|
||||
[ , ] [ [ inputs>> values ] [ dst>> ] bi phi-mappings get set-at ] bi ;
|
||||
! When a float is unboxed, we replace the ##load-constant with a ##load-double
|
||||
! if the architecture supports it
|
||||
: convert-to-load-double? ( insn -- ? )
|
||||
{
|
||||
[ drop load-double? ]
|
||||
[ dst>> rep-of double-rep? ]
|
||||
[ obj>> float? ]
|
||||
} 1&& ;
|
||||
|
||||
! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
|
||||
! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
|
||||
|
|
@ -234,17 +304,25 @@ M: ##phi conversions-for-insn
|
|||
[ dst>> rep-of vector-rep? ]
|
||||
[ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ]
|
||||
} 1&& ;
|
||||
|
||||
: convert-to-fill-vector? ( insn -- ? )
|
||||
{
|
||||
[ dst>> rep-of vector-rep? ]
|
||||
[ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ]
|
||||
} 1&& ;
|
||||
|
||||
: (convert-to-load-double) ( insn -- dst val )
|
||||
[ dst>> ] [ obj>> ] bi ; inline
|
||||
|
||||
: (convert-to-zero/fill-vector) ( insn -- dst rep )
|
||||
dst>> dup rep-of ; inline
|
||||
|
||||
: conversions-for-load-insn ( insn -- ?insn )
|
||||
{
|
||||
{
|
||||
[ dup convert-to-load-double? ]
|
||||
[ (convert-to-load-double) ##load-double f ]
|
||||
}
|
||||
{
|
||||
[ dup convert-to-zero-vector? ]
|
||||
[ (convert-to-zero/fill-vector) ##zero-vector f ]
|
||||
|
|
@ -277,46 +355,8 @@ M: insn conversions-for-insn , ;
|
|||
] change-instructions drop
|
||||
] if ;
|
||||
|
||||
! If the output of a phi instruction is only used as the input to another
|
||||
! phi instruction, then we want to use the same representation for both
|
||||
! if possible.
|
||||
SYMBOL: work-list
|
||||
|
||||
: add-to-work-list ( vregs -- )
|
||||
work-list get push-all-front ;
|
||||
|
||||
: rep-assigned ( vregs -- vregs' )
|
||||
representations get '[ _ key? ] filter ;
|
||||
|
||||
: rep-not-assigned ( vregs -- vregs' )
|
||||
representations get '[ _ key? not ] filter ;
|
||||
|
||||
: add-ready-phis ( -- )
|
||||
phi-mappings get keys rep-assigned add-to-work-list ;
|
||||
|
||||
: process-phi-mapping ( dst -- )
|
||||
! If dst = phi(src1,src2,...) and dst's representation has been
|
||||
! determined, assign that representation to each one of src1,...
|
||||
! that does not have a representation yet, and process those, too.
|
||||
dup phi-mappings get at* [
|
||||
[ rep-of ] [ rep-not-assigned ] bi*
|
||||
[ [ set-rep-of ] with each ] [ add-to-work-list ] bi
|
||||
] [ 2drop ] if ;
|
||||
|
||||
: remaining-phi-mappings ( -- )
|
||||
phi-mappings get keys rep-not-assigned
|
||||
[ [ int-rep ] dip set-rep-of ] each ;
|
||||
|
||||
: process-phi-mappings ( -- )
|
||||
<hashed-dlist> work-list set
|
||||
add-ready-phis
|
||||
work-list get [ process-phi-mapping ] slurp-deque
|
||||
remaining-phi-mappings ;
|
||||
|
||||
: insert-conversions ( cfg -- )
|
||||
H{ } clone phi-mappings set
|
||||
[ conversions-for-block ] each-basic-block
|
||||
process-phi-mappings ;
|
||||
[ conversions-for-block ] each-basic-block ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
|
@ -326,6 +366,7 @@ PRIVATE>
|
|||
{
|
||||
[ compute-possibilities ]
|
||||
[ compute-representations ]
|
||||
[ compute-phi-representations ]
|
||||
[ insert-conversions ]
|
||||
[ ]
|
||||
} cleave
|
||||
|
|
|
|||
|
|
@ -27,6 +27,12 @@ IN: compiler.cfg.value-numbering.rewrite
|
|||
[ value>> immediate-bitwise? ]
|
||||
} 1&& ;
|
||||
|
||||
: vreg-immediate-comparand? ( vreg -- ? )
|
||||
vreg>expr {
|
||||
[ constant-expr? ]
|
||||
[ value>> immediate-comparand? ]
|
||||
} 1&& ;
|
||||
|
||||
! Outputs f to mean no change
|
||||
|
||||
GENERIC: rewrite ( insn -- insn/f )
|
||||
|
|
@ -35,10 +41,7 @@ M: insn rewrite drop f ;
|
|||
|
||||
: ##branch-t? ( insn -- ? )
|
||||
dup ##compare-imm-branch? [
|
||||
{
|
||||
[ cc>> cc/= eq? ]
|
||||
[ src2>> \ f type-number eq? ]
|
||||
} 1&&
|
||||
{ [ cc>> cc/= eq? ] [ src2>> not ] } 1&&
|
||||
] [ drop f ] if ; inline
|
||||
|
||||
: general-compare-expr? ( insn -- ? )
|
||||
|
|
@ -118,8 +121,8 @@ M: ##compare-imm rewrite-tagged-comparison
|
|||
: rewrite-redundant-comparison? ( insn -- ? )
|
||||
{
|
||||
[ src1>> vreg>expr general-compare-expr? ]
|
||||
[ src2>> \ f type-number = ]
|
||||
[ cc>> { cc= cc/= } member-eq? ]
|
||||
[ src2>> not ]
|
||||
[ cc>> { cc= cc/= } member? ]
|
||||
} 1&& ; inline
|
||||
|
||||
: rewrite-redundant-comparison ( insn -- insn' )
|
||||
|
|
@ -131,17 +134,12 @@ M: ##compare-imm rewrite-tagged-comparison
|
|||
} cond
|
||||
swap cc= eq? [ [ negate-cc ] change-cc ] when ;
|
||||
|
||||
ERROR: bad-comparison ;
|
||||
|
||||
: (fold-compare-imm) ( insn -- ? )
|
||||
[ [ src1>> vreg>constant ] [ src2>> ] bi ] [ cc>> ] bi
|
||||
pick integer?
|
||||
[ [ <=> ] dip evaluate-cc ]
|
||||
[
|
||||
2nip {
|
||||
{ cc= [ f ] }
|
||||
{ cc/= [ t ] }
|
||||
[ bad-comparison ]
|
||||
[ src1>> vreg>constant ] [ src2>> ] [ cc>> ] tri
|
||||
2over [ integer? ] both? [ [ <=> ] dip evaluate-cc ] [
|
||||
{
|
||||
{ cc= [ eq? ] }
|
||||
{ cc/= [ eq? not ] }
|
||||
} case
|
||||
] if ;
|
||||
|
||||
|
|
@ -189,8 +187,8 @@ M: ##compare-imm-branch rewrite
|
|||
|
||||
M: ##compare-branch rewrite
|
||||
{
|
||||
{ [ dup src1>> vreg-immediate-arithmetic? ] [ t >compare-imm-branch ] }
|
||||
{ [ dup src2>> vreg-immediate-arithmetic? ] [ f >compare-imm-branch ] }
|
||||
{ [ dup src1>> vreg-immediate-comparand? ] [ t >compare-imm-branch ] }
|
||||
{ [ dup src2>> vreg-immediate-comparand? ] [ f >compare-imm-branch ] }
|
||||
{ [ dup self-compare? ] [ rewrite-self-compare-branch ] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
|
@ -209,19 +207,15 @@ M: ##compare-branch rewrite
|
|||
next-vreg \ ##compare-imm new-insn ; inline
|
||||
|
||||
: >boolean-insn ( insn ? -- insn' )
|
||||
[ dst>> ] dip
|
||||
{
|
||||
{ t [ t \ ##load-constant new-insn ] }
|
||||
{ f [ \ f type-number \ ##load-immediate new-insn ] }
|
||||
} case ;
|
||||
[ dst>> ] dip \ ##load-constant new-insn ;
|
||||
|
||||
: rewrite-self-compare ( insn -- insn' )
|
||||
dup (rewrite-self-compare) >boolean-insn ;
|
||||
|
||||
M: ##compare rewrite
|
||||
{
|
||||
{ [ dup src1>> vreg-immediate-arithmetic? ] [ t >compare-imm ] }
|
||||
{ [ dup src2>> vreg-immediate-arithmetic? ] [ f >compare-imm ] }
|
||||
{ [ dup src1>> vreg-immediate-comparand? ] [ t >compare-imm ] }
|
||||
{ [ dup src2>> vreg-immediate-comparand? ] [ f >compare-imm ] }
|
||||
{ [ dup self-compare? ] [ rewrite-self-compare ] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
|
@ -254,7 +248,12 @@ M: ##shl-imm constant-fold* drop shift ;
|
|||
|
||||
: constant-fold ( insn -- insn' )
|
||||
[ dst>> ]
|
||||
[ [ src1>> vreg>constant ] [ src2>> ] [ ] tri constant-fold* ] bi
|
||||
[
|
||||
[ src1>> vreg>constant \ f type-number or ]
|
||||
[ src2>> ]
|
||||
[ ]
|
||||
tri constant-fold*
|
||||
] bi
|
||||
\ ##load-immediate new-insn ; inline
|
||||
|
||||
: unary-constant-fold? ( insn -- ? )
|
||||
|
|
@ -380,7 +379,7 @@ M: ##sar-imm rewrite
|
|||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
: insn>imm-insn ( insn op swap? -- )
|
||||
: insn>imm-insn ( insn op swap? -- new-insn )
|
||||
swap [
|
||||
[ [ dst>> ] [ src1>> ] [ src2>> ] tri ] dip
|
||||
[ swap ] when vreg>constant
|
||||
|
|
@ -390,13 +389,13 @@ M: ##sar-imm rewrite
|
|||
arithmetic-op?
|
||||
[ vreg-immediate-arithmetic? ] [ vreg-immediate-bitwise? ] if ;
|
||||
|
||||
: rewrite-arithmetic ( insn op -- ? )
|
||||
: rewrite-arithmetic ( insn op -- insn/f )
|
||||
{
|
||||
{ [ over src2>> over vreg-immediate? ] [ f insn>imm-insn ] }
|
||||
[ 2drop f ]
|
||||
} cond ; inline
|
||||
|
||||
: rewrite-arithmetic-commutative ( insn op -- ? )
|
||||
: rewrite-arithmetic-commutative ( insn op -- insn/f )
|
||||
{
|
||||
{ [ over src2>> over vreg-immediate? ] [ f insn>imm-insn ] }
|
||||
{ [ over src1>> over vreg-immediate? ] [ t insn>imm-insn ] }
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ cpu.architecture tools.test kernel math combinators.short-circuit
|
|||
accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
|
||||
compiler.cfg.ssa.destruction compiler.cfg.loop-detection
|
||||
compiler.cfg.representations compiler.cfg assocs vectors arrays
|
||||
layouts literals namespaces alien compiler.cfg.value-numbering.simd ;
|
||||
layouts literals namespaces alien compiler.cfg.value-numbering.simd
|
||||
system ;
|
||||
IN: compiler.cfg.value-numbering.tests
|
||||
|
||||
: trim-temps ( insns -- insns )
|
||||
|
|
@ -82,7 +83,7 @@ IN: compiler.cfg.value-numbering.tests
|
|||
T{ ##load-reference f 1 + }
|
||||
T{ ##peek f 2 D 0 }
|
||||
T{ ##compare f 4 2 1 cc> }
|
||||
T{ ##compare-imm f 6 4 $[ \ f type-number ] cc/= }
|
||||
T{ ##compare-imm f 6 4 f cc/= }
|
||||
T{ ##replace f 6 D 0 }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
|
@ -100,7 +101,7 @@ IN: compiler.cfg.value-numbering.tests
|
|||
T{ ##load-reference f 1 + }
|
||||
T{ ##peek f 2 D 0 }
|
||||
T{ ##compare f 4 2 1 cc<= }
|
||||
T{ ##compare-imm f 6 4 $[ \ f type-number ] cc= }
|
||||
T{ ##compare-imm f 6 4 f cc= }
|
||||
T{ ##replace f 6 D 0 }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
|
@ -118,7 +119,7 @@ IN: compiler.cfg.value-numbering.tests
|
|||
T{ ##peek f 8 D 0 }
|
||||
T{ ##peek f 9 D -1 }
|
||||
T{ ##compare-float-unordered f 12 8 9 cc< }
|
||||
T{ ##compare-imm f 14 12 $[ \ f type-number ] cc= }
|
||||
T{ ##compare-imm f 14 12 f cc= }
|
||||
T{ ##replace f 14 D 0 }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
|
@ -135,7 +136,7 @@ IN: compiler.cfg.value-numbering.tests
|
|||
T{ ##peek f 29 D -1 }
|
||||
T{ ##peek f 30 D -2 }
|
||||
T{ ##compare f 33 29 30 cc<= }
|
||||
T{ ##compare-imm-branch f 33 $[ \ f type-number ] cc/= }
|
||||
T{ ##compare-imm-branch f 33 f cc/= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
|
|
@ -149,7 +150,7 @@ IN: compiler.cfg.value-numbering.tests
|
|||
{
|
||||
T{ ##peek f 1 D -1 }
|
||||
T{ ##test-vector f 2 1 f float-4-rep vcc-any }
|
||||
T{ ##compare-imm-branch f 2 $[ \ f type-number ] cc/= }
|
||||
T{ ##compare-imm-branch f 2 f cc/= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
|
|
@ -418,6 +419,36 @@ IN: compiler.cfg.value-numbering.tests
|
|||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
cpu x86.32? [
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 + }
|
||||
T{ ##compare-imm f 2 0 + cc= }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 + }
|
||||
T{ ##compare f 2 0 1 cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 + }
|
||||
T{ ##compare-imm-branch f 0 + cc= }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 + }
|
||||
T{ ##compare-branch f 0 1 cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
] when
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
|
|
@ -432,6 +463,20 @@ IN: compiler.cfg.value-numbering.tests
|
|||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 3.5 }
|
||||
T{ ##compare-branch f 0 1 cc= }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 3.5 }
|
||||
T{ ##compare-branch f 0 1 cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
|
|
@ -460,20 +505,6 @@ IN: compiler.cfg.value-numbering.tests
|
|||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 3.5 }
|
||||
T{ ##compare-branch f 0 1 cc= }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-constant f 1 3.5 }
|
||||
T{ ##compare-branch f 0 1 cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
|
|
@ -488,6 +519,59 @@ IN: compiler.cfg.value-numbering.tests
|
|||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
! Branch folding
|
||||
[
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##load-immediate f 2 200 }
|
||||
T{ ##load-constant f 3 t }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##load-immediate f 2 200 }
|
||||
T{ ##compare f 3 1 2 cc<= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##load-immediate f 2 200 }
|
||||
T{ ##load-constant f 3 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##load-immediate f 2 200 }
|
||||
T{ ##compare f 3 1 2 cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##load-constant f 2 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##load-immediate f 1 100 }
|
||||
T{ ##compare-imm f 2 1 f cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##load-constant f 1 f }
|
||||
T{ ##load-constant f 2 t }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##load-constant f 1 f }
|
||||
T{ ##compare-imm f 2 1 f cc= }
|
||||
} value-numbering-step trim-temps
|
||||
] unit-test
|
||||
|
||||
! Reassociation
|
||||
[
|
||||
{
|
||||
|
|
@ -1011,6 +1095,19 @@ cell 8 = [
|
|||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
! Stupid constant folding corner case
|
||||
[
|
||||
{
|
||||
T{ ##load-constant f 1 f }
|
||||
T{ ##load-immediate f 2 $[ \ f type-number ] }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##load-constant f 1 f }
|
||||
T{ ##and-imm f 2 1 15 }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
! Displaced alien optimizations
|
||||
3 vreg-counter set-global
|
||||
|
||||
|
|
@ -1073,7 +1170,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-immediate f 1 10 }
|
||||
T{ ##load-immediate f 2 20 }
|
||||
T{ ##load-immediate f 3 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 3 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
|
|
@ -1115,7 +1212,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-immediate f 1 10 }
|
||||
T{ ##load-immediate f 2 20 }
|
||||
T{ ##load-immediate f 3 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 3 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
|
|
@ -1128,7 +1225,7 @@ cell 8 = [
|
|||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-immediate f 1 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 1 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
|
|
@ -1152,7 +1249,7 @@ cell 8 = [
|
|||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-immediate f 1 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 1 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
|
|
@ -1176,7 +1273,7 @@ cell 8 = [
|
|||
[
|
||||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##load-immediate f 1 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 1 f }
|
||||
}
|
||||
] [
|
||||
{
|
||||
|
|
@ -1557,7 +1654,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##compare f 1 0 0 cc<= }
|
||||
T{ ##compare-imm-branch f 1 $[ \ f type-number ] cc/= }
|
||||
T{ ##compare-imm-branch f 1 f cc/= }
|
||||
} test-branch-folding
|
||||
] unit-test
|
||||
|
||||
|
|
@ -1659,7 +1756,7 @@ V{
|
|||
T{ ##copy { dst 21 } { src 20 } { rep any-rep } }
|
||||
T{ ##compare-imm-branch
|
||||
{ src1 21 }
|
||||
{ src2 $[ \ f type-number ] }
|
||||
{ src2 f }
|
||||
{ cc cc/= }
|
||||
}
|
||||
} 1 test-bb
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ SYNTAX: CODEGEN:
|
|||
CODEGEN: ##load-immediate %load-immediate
|
||||
CODEGEN: ##load-reference %load-reference
|
||||
CODEGEN: ##load-constant %load-reference
|
||||
CODEGEN: ##load-double %load-double
|
||||
CODEGEN: ##peek %peek
|
||||
CODEGEN: ##replace %replace
|
||||
CODEGEN: ##inc-d %inc-d
|
||||
|
|
|
|||
|
|
@ -70,9 +70,12 @@ MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
|
|||
: rel-word-pic-tail ( word class -- )
|
||||
[ add-literal ] dip rt-entry-point-pic-tail rel-fixup ;
|
||||
|
||||
: rel-immediate ( literal class -- )
|
||||
: rel-literal ( literal class -- )
|
||||
[ add-literal ] dip rt-literal rel-fixup ;
|
||||
|
||||
: rel-float ( literal class -- )
|
||||
[ add-literal ] dip rt-float rel-fixup ;
|
||||
|
||||
: rel-this ( class -- )
|
||||
rt-this rel-fixup ;
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ C-ENUM: f
|
|||
rt-vm
|
||||
rt-cards-offset
|
||||
rt-decks-offset
|
||||
rt-exception-handler ;
|
||||
rt-exception-handler
|
||||
rt-float ;
|
||||
|
||||
: rc-absolute? ( n -- ? )
|
||||
${
|
||||
|
|
|
|||
|
|
@ -33,10 +33,10 @@ IN: compiler.tests.low-level-ir
|
|||
compile-test-cfg
|
||||
execute( -- result ) ;
|
||||
|
||||
! loading immediates
|
||||
! loading constants
|
||||
[ f ] [
|
||||
V{
|
||||
T{ ##load-immediate f 0 $[ \ f type-number ] }
|
||||
T{ ##load-constant f 0 f }
|
||||
} compile-test-bb
|
||||
] unit-test
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ IN: compiler.tree.propagation.recursive.tests
|
|||
integer generalize-counter-interval
|
||||
] unit-test
|
||||
|
||||
[ T{ interval f { 0 t } { $[ most-positive-fixnum ] t } } ] [
|
||||
[ T{ interval f { 0 t } { $[ max-array-capacity ] t } } ] [
|
||||
T{ interval f { 1 t } { 1 t } }
|
||||
T{ interval f { 0 t } { 0 t } }
|
||||
fixnum generalize-counter-interval
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||
! Copyright (C) 2008, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences accessors arrays fry math math.intervals
|
||||
layouts combinators namespaces locals
|
||||
USING: kernel classes.algebra sequences accessors arrays fry
|
||||
math math.intervals layouts combinators namespaces locals
|
||||
stack-checker.inlining
|
||||
compiler.tree
|
||||
compiler.tree.combinators
|
||||
|
|
@ -11,6 +11,7 @@ compiler.tree.propagation.nodes
|
|||
compiler.tree.propagation.simple
|
||||
compiler.tree.propagation.branches
|
||||
compiler.tree.propagation.constraints ;
|
||||
FROM: sequences.private => array-capacity ;
|
||||
IN: compiler.tree.propagation.recursive
|
||||
|
||||
: check-fixed-point ( node infos1 infos2 -- )
|
||||
|
|
@ -24,7 +25,14 @@ IN: compiler.tree.propagation.recursive
|
|||
[ label>> calls>> [ node>> node-input-infos ] map flip ]
|
||||
[ latest-input-infos ] bi ;
|
||||
|
||||
: counter-class ( interval class -- class' )
|
||||
dup fixnum class<= [
|
||||
swap array-capacity-interval interval-subset?
|
||||
[ drop array-capacity ] when
|
||||
] [ nip ] if ;
|
||||
|
||||
:: generalize-counter-interval ( interval initial-interval class -- interval' )
|
||||
interval class counter-class :> class
|
||||
{
|
||||
{ [ interval initial-interval interval-subset? ] [ initial-interval ] }
|
||||
{ [ interval empty-interval eq? ] [ initial-interval ] }
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
! Copyright (C) 2008, 2010 Slava Pestov, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types kernel sequences words fry generic accessors
|
||||
classes.tuple classes classes.algebra definitions
|
||||
stack-checker.dependencies quotations classes.tuple.private math
|
||||
math.partial-dispatch math.private math.intervals sets.private
|
||||
math.floats.private math.integers.private layouts math.order
|
||||
vectors hashtables combinators effects generalizations assocs
|
||||
sets combinators.short-circuit sequences.private locals growable
|
||||
USING: alien.c-types kernel sequences words fry generic
|
||||
generic.single accessors classes.tuple classes classes.algebra
|
||||
definitions stack-checker.dependencies quotations
|
||||
classes.tuple.private math math.partial-dispatch math.private
|
||||
math.intervals sets.private math.floats.private
|
||||
math.integers.private layouts math.order vectors hashtables
|
||||
combinators effects generalizations assocs sets
|
||||
combinators.short-circuit sequences.private locals growable
|
||||
stack-checker namespaces compiler.tree.propagation.info ;
|
||||
FROM: math => float ;
|
||||
FROM: sets => set ;
|
||||
|
|
@ -299,6 +300,12 @@ M\ set intersect [ intersect-quot ] 1 define-partial-eval
|
|||
[ \ push def>> ] [ f ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
! Speeds up fasta benchmark
|
||||
\ >fixnum [
|
||||
in-d>> first value-info class>> fixnum \ f class-or class<=
|
||||
[ [ dup [ \ >fixnum no-method ] unless ] ] [ f ] if
|
||||
] "custom-inlining" set-word-prop
|
||||
|
||||
! We want to constant-fold calls to heap-size, and recompile those
|
||||
! calls when a C type is redefined
|
||||
\ heap-size [
|
||||
|
|
|
|||
|
|
@ -202,8 +202,9 @@ M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ;
|
|||
! Mapping from register class to machine registers
|
||||
HOOK: machine-registers cpu ( -- assoc )
|
||||
|
||||
HOOK: %load-immediate cpu ( reg obj -- )
|
||||
HOOK: %load-immediate cpu ( reg val -- )
|
||||
HOOK: %load-reference cpu ( reg obj -- )
|
||||
HOOK: %load-double cpu ( reg val -- )
|
||||
|
||||
HOOK: %peek cpu ( vreg loc -- )
|
||||
HOOK: %replace cpu ( vreg loc -- )
|
||||
|
|
@ -496,15 +497,32 @@ M: reg-class param-reg param-regs nth ;
|
|||
|
||||
M: stack-params param-reg 2drop ;
|
||||
|
||||
! Is this integer small enough to be an immediate operand for
|
||||
! %add-imm, %sub-imm, and %mul-imm?
|
||||
! Does this architecture support %load-double?
|
||||
HOOK: load-double? cpu ( -- ? )
|
||||
|
||||
M: object load-double? f ;
|
||||
|
||||
! Can this value be an immediate operand for %add-imm, %sub-imm,
|
||||
! or %mul-imm?
|
||||
HOOK: immediate-arithmetic? cpu ( n -- ? )
|
||||
|
||||
! Is this integer small enough to be an immediate operand for
|
||||
! %and-imm, %or-imm, and %xor-imm?
|
||||
! Can this value be an immediate operand for %and-imm, %or-imm,
|
||||
! or %xor-imm?
|
||||
HOOK: immediate-bitwise? cpu ( n -- ? )
|
||||
|
||||
! What c-type describes the implicit struct return pointer for large structs?
|
||||
! Can this value be an immediate operand for %compare-imm or
|
||||
! %compare-imm-branch?
|
||||
HOOK: immediate-comparand? cpu ( n -- ? )
|
||||
|
||||
M: object immediate-comparand? ( n -- ? )
|
||||
{
|
||||
{ [ dup integer? ] [ immediate-arithmetic? ] }
|
||||
{ [ dup not ] [ drop t ] }
|
||||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
! What c-type describes the implicit struct return pointer for
|
||||
! large structs?
|
||||
HOOK: struct-return-pointer-type cpu ( -- c-type )
|
||||
|
||||
! Is this structure small enough to be returned in registers?
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ CONSTANT: fp-scratch-reg 30
|
|||
M: ppc %load-immediate ( reg n -- ) swap LOAD ;
|
||||
|
||||
M: ppc %load-reference ( reg obj -- )
|
||||
[ 0 swap LOAD32 ] [ rc-absolute-ppc-2/2 rel-immediate ] bi* ;
|
||||
[ 0 swap LOAD32 ] [ rc-absolute-ppc-2/2 rel-literal ] bi* ;
|
||||
|
||||
M: ppc %alien-global ( register symbol dll -- )
|
||||
[ 0 swap LOAD32 ] 2dip rc-absolute-ppc-2/2 rel-dlsym ;
|
||||
|
|
@ -492,7 +492,7 @@ M: ppc %epilogue ( n -- )
|
|||
} case ;
|
||||
|
||||
: (%compare) ( src1 src2 -- ) [ 0 ] dip CMP ; inline
|
||||
: (%compare-imm) ( src1 src2 -- ) [ 0 ] 2dip CMPI ; inline
|
||||
: (%compare-imm) ( src1 src2 -- ) [ 0 ] [ ] [ \ f type-number or ] tri* CMPI ; inline
|
||||
: (%compare-float-unordered) ( src1 src2 -- ) [ 0 ] dip FCMPU ; inline
|
||||
: (%compare-float-ordered) ( src1 src2 -- ) [ 0 ] dip FCMPO ; inline
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: locals alien alien.c-types alien.libraries alien.syntax
|
||||
arrays kernel fry math namespaces sequences system layouts io
|
||||
vocabs.loader accessors init classes.struct combinators command-line
|
||||
make compiler compiler.units compiler.constants compiler.alien
|
||||
compiler.codegen compiler.codegen.fixup
|
||||
compiler.cfg.instructions compiler.cfg.builder
|
||||
compiler.cfg.intrinsics compiler.cfg.stack-frame
|
||||
cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
|
||||
cpu.architecture vm ;
|
||||
vocabs.loader accessors init classes.struct combinators
|
||||
command-line make words compiler compiler.units
|
||||
compiler.constants compiler.alien compiler.codegen
|
||||
compiler.codegen.fixup compiler.cfg.instructions
|
||||
compiler.cfg.builder compiler.cfg.intrinsics
|
||||
compiler.cfg.stack-frame cpu.x86.assembler
|
||||
cpu.x86.assembler.operands cpu.x86 cpu.architecture vm ;
|
||||
FROM: layouts => cell ;
|
||||
IN: cpu.x86.32
|
||||
|
||||
|
|
@ -24,6 +24,14 @@ M: x86.32 stack-reg ESP ;
|
|||
M: x86.32 frame-reg EBP ;
|
||||
M: x86.32 temp-reg ECX ;
|
||||
|
||||
M: x86.32 immediate-comparand? ( n -- ? )
|
||||
[ call-next-method ] [ word? ] bi or ;
|
||||
|
||||
M: x86.32 load-double? ( -- ? ) t ;
|
||||
|
||||
M: x86.32 %load-double ( dst val -- )
|
||||
[ 0 [] MOVSD ] dip rc-absolute rel-float ;
|
||||
|
||||
M: x86.32 %mov-vm-ptr ( reg -- )
|
||||
0 MOV 0 rc-absolute-cell rel-vm ;
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ HOOK: pic-tail-reg cpu ( -- reg )
|
|||
|
||||
M: x86 %load-immediate dup 0 = [ drop dup XOR ] [ MOV ] if ;
|
||||
|
||||
M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-immediate ;
|
||||
M: x86 %load-reference swap 0 MOV rc-absolute-cell rel-literal ;
|
||||
|
||||
HOOK: ds-reg cpu ( -- reg )
|
||||
HOOK: rs-reg cpu ( -- reg )
|
||||
|
|
@ -491,43 +491,60 @@ M: x86 %push-context-stack ( -- )
|
|||
|
||||
M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
|
||||
|
||||
:: %boolean ( dst temp word -- )
|
||||
:: (%boolean) ( dst temp insn -- )
|
||||
dst \ f type-number MOV
|
||||
temp 0 MOV \ t rc-absolute-cell rel-immediate
|
||||
dst temp word execute ; inline
|
||||
temp 0 MOV \ t rc-absolute-cell rel-literal
|
||||
dst temp insn execute ; inline
|
||||
|
||||
: (%compare) ( src1 src2 cc -- )
|
||||
2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and
|
||||
[ drop dup TEST ]
|
||||
[ CMP ] if ;
|
||||
: %boolean ( dst cc temp -- )
|
||||
swap order-cc {
|
||||
{ cc< [ \ CMOVL (%boolean) ] }
|
||||
{ cc<= [ \ CMOVLE (%boolean) ] }
|
||||
{ cc> [ \ CMOVG (%boolean) ] }
|
||||
{ cc>= [ \ CMOVGE (%boolean) ] }
|
||||
{ cc= [ \ CMOVE (%boolean) ] }
|
||||
{ cc/= [ \ CMOVNE (%boolean) ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %compare ( dst src1 src2 cc temp -- )
|
||||
src1 src2 cc (%compare)
|
||||
cc order-cc {
|
||||
{ cc< [ dst temp \ CMOVL %boolean ] }
|
||||
{ cc<= [ dst temp \ CMOVLE %boolean ] }
|
||||
{ cc> [ dst temp \ CMOVG %boolean ] }
|
||||
{ cc>= [ dst temp \ CMOVGE %boolean ] }
|
||||
{ cc= [ dst temp \ CMOVE %boolean ] }
|
||||
{ cc/= [ dst temp \ CMOVNE %boolean ] }
|
||||
} case ;
|
||||
src1 src2 CMP
|
||||
dst cc temp %boolean ;
|
||||
|
||||
M: x86 %compare-imm ( dst src1 src2 cc temp -- )
|
||||
%compare ;
|
||||
: use-test? ( src1 src2 cc -- ? )
|
||||
[ register? ] [ 0 = ] [ { cc= cc/= } member? ] tri* and and ;
|
||||
|
||||
: (%compare-tagged) ( src1 src2 -- )
|
||||
[ HEX: ffffffff CMP ] dip rc-absolute rel-literal ;
|
||||
|
||||
: (%compare-imm) ( src1 src2 cc -- )
|
||||
{
|
||||
{ [ 3dup use-test? ] [ 2drop dup TEST ] }
|
||||
{ [ over integer? ] [ drop CMP ] }
|
||||
{ [ over word? ] [ drop (%compare-tagged) ] }
|
||||
{ [ over not ] [ 2drop \ f type-number CMP ] }
|
||||
} cond ;
|
||||
|
||||
M:: x86 %compare-imm ( dst src1 src2 cc temp -- )
|
||||
src1 src2 cc (%compare-imm)
|
||||
dst cc temp %boolean ;
|
||||
|
||||
: %branch ( label cc -- )
|
||||
order-cc {
|
||||
{ cc< [ JL ] }
|
||||
{ cc<= [ JLE ] }
|
||||
{ cc> [ JG ] }
|
||||
{ cc>= [ JGE ] }
|
||||
{ cc= [ JE ] }
|
||||
{ cc/= [ JNE ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %compare-branch ( label src1 src2 cc -- )
|
||||
src1 src2 cc (%compare)
|
||||
cc order-cc {
|
||||
{ cc< [ label JL ] }
|
||||
{ cc<= [ label JLE ] }
|
||||
{ cc> [ label JG ] }
|
||||
{ cc>= [ label JGE ] }
|
||||
{ cc= [ label JE ] }
|
||||
{ cc/= [ label JNE ] }
|
||||
} case ;
|
||||
src1 src2 CMP
|
||||
label cc %branch ;
|
||||
|
||||
M: x86 %compare-imm-branch ( label src1 src2 cc -- )
|
||||
%compare-branch ;
|
||||
M:: x86 %compare-imm-branch ( label src1 src2 cc -- )
|
||||
src1 src2 cc (%compare-imm)
|
||||
label cc %branch ;
|
||||
|
||||
M: x86 %add-float double-rep two-operand ADDSD ;
|
||||
M: x86 %sub-float double-rep two-operand SUBSD ;
|
||||
|
|
@ -569,20 +586,20 @@ M: x86 %float>integer CVTTSD2SI ;
|
|||
|
||||
:: (%compare-float) ( dst src1 src2 cc temp compare -- )
|
||||
cc {
|
||||
{ cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
|
||||
{ cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
|
||||
{ cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] }
|
||||
{ cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] }
|
||||
{ cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] }
|
||||
{ cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] }
|
||||
{ cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] }
|
||||
{ cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
|
||||
{ cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
|
||||
{ cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] }
|
||||
{ cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] }
|
||||
{ cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] }
|
||||
{ cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] }
|
||||
{ cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] }
|
||||
{ cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA (%boolean) ] }
|
||||
{ cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE (%boolean) ] }
|
||||
{ cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA (%boolean) ] }
|
||||
{ cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE (%boolean) ] }
|
||||
{ cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= (%boolean) ] }
|
||||
{ cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE (%boolean) ] }
|
||||
{ cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP (%boolean) ] }
|
||||
{ cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE (%boolean) ] }
|
||||
{ cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB (%boolean) ] }
|
||||
{ cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE (%boolean) ] }
|
||||
{ cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB (%boolean) ] }
|
||||
{ cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= (%boolean) ] }
|
||||
{ cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE (%boolean) ] }
|
||||
{ cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP (%boolean) ] }
|
||||
} case ; inline
|
||||
|
||||
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
|
||||
|
|
@ -954,10 +971,10 @@ M: x86 %compare-vector-ccs
|
|||
|
||||
:: %test-vector-mask ( dst temp mask vcc -- )
|
||||
vcc {
|
||||
{ vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }
|
||||
{ vcc-none [ dst dst TEST dst temp \ CMOVE %boolean ] }
|
||||
{ vcc-all [ dst mask CMP dst temp \ CMOVE %boolean ] }
|
||||
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE %boolean ] }
|
||||
{ vcc-any [ dst dst TEST dst temp \ CMOVNE (%boolean) ] }
|
||||
{ vcc-none [ dst dst TEST dst temp \ CMOVE (%boolean) ] }
|
||||
{ vcc-all [ dst mask CMP dst temp \ CMOVE (%boolean) ] }
|
||||
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
|
||||
} case ;
|
||||
|
||||
: %move-vector-mask ( dst src rep -- mask )
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs byte-arrays byte-vectors classes
|
||||
combinators definitions effects fry generic generic.single
|
||||
generic.standard hashtables io.binary io.streams.string kernel
|
||||
kernel.private math math.integers.private math.parser
|
||||
namespaces parser sbufs sequences splitting splitting.private strings
|
||||
vectors words ;
|
||||
generic.standard hashtables io.binary io.encodings
|
||||
io.streams.string kernel kernel.private math
|
||||
math.integers.private math.parser namespaces parser sbufs
|
||||
sequences splitting splitting.private strings vectors words ;
|
||||
IN: hints
|
||||
|
||||
GENERIC: specializer-predicate ( spec -- quot )
|
||||
|
|
@ -131,3 +131,5 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr
|
|||
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
|
||||
|
||||
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
|
||||
|
||||
\ encode-string { string object object } "specializer" set-word-prop
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math.parser arrays io.encodings sequences kernel assocs
|
||||
hashtables io.encodings.ascii generic parser classes.tuple words
|
||||
words.symbol io io.files splitting namespaces math
|
||||
compiler.units accessors classes.singleton classes.mixin
|
||||
io.encodings.iana fry simple-flat-file lexer ;
|
||||
USING: arrays assocs biassocs kernel io.encodings math.parser
|
||||
sequences hashtables io.encodings.ascii generic parser
|
||||
classes.tuple words words.symbol io io.files splitting
|
||||
namespaces math compiler.units accessors classes.singleton
|
||||
classes.mixin io.encodings.iana fry simple-flat-file lexer ;
|
||||
IN: io.encodings.8-bit
|
||||
|
||||
<PRIVATE
|
||||
|
|
@ -15,20 +15,22 @@ IN: io.encodings.8-bit
|
|||
SYMBOL: 8-bit-encodings
|
||||
8-bit-encodings [ H{ } clone ] initialize
|
||||
|
||||
TUPLE: 8-bit biassoc ;
|
||||
TUPLE: 8-bit { biassoc biassoc read-only } ;
|
||||
|
||||
: encode-8-bit ( char stream assoc -- )
|
||||
swapd value-at
|
||||
[ swap stream-write1 ] [ encode-error ] if* ; inline
|
||||
: 8-bit-encode ( char 8-bit -- byte )
|
||||
biassoc>> value-at [ encode-error ] unless* ; inline
|
||||
|
||||
M: 8-bit encode-char biassoc>> encode-8-bit ;
|
||||
M: 8-bit encode-char
|
||||
swap [ 8-bit-encode ] dip stream-write1 ;
|
||||
|
||||
: decode-8-bit ( stream assoc -- char/f )
|
||||
swap stream-read1
|
||||
[ swap at [ replacement-char ] unless* ]
|
||||
[ drop f ] if* ; inline
|
||||
M: 8-bit encode-string
|
||||
swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
|
||||
|
||||
M: 8-bit decode-char biassoc>> decode-8-bit ;
|
||||
M: 8-bit decode-char
|
||||
swap stream-read1 dup
|
||||
[ swap biassoc>> at [ replacement-char ] unless* ]
|
||||
[ 2drop f ]
|
||||
if ;
|
||||
|
||||
MIXIN: 8-bit-encoding
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,27 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings kernel math io.encodings.private ;
|
||||
USING: accessors byte-arrays io io.encodings
|
||||
io.encodings.private kernel math sequences ;
|
||||
IN: io.encodings.ascii
|
||||
|
||||
<PRIVATE
|
||||
: encode-if< ( char stream encoding max -- )
|
||||
nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
|
||||
|
||||
: decode-if< ( stream encoding max -- character )
|
||||
nip swap stream-read1 dup
|
||||
[ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
|
||||
[ 2drop f ] if ; inline
|
||||
PRIVATE>
|
||||
|
||||
SINGLETON: ascii
|
||||
|
||||
M: ascii encode-char
|
||||
128 encode-if< ; inline
|
||||
drop
|
||||
over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
|
||||
|
||||
M: ascii encode-string
|
||||
drop
|
||||
[
|
||||
dup aux>>
|
||||
[ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
|
||||
[ >byte-array ]
|
||||
if
|
||||
] dip
|
||||
stream-write ;
|
||||
|
||||
M: ascii decode-char
|
||||
128 decode-if< ; inline
|
||||
drop
|
||||
stream-read1 dup [
|
||||
dup 127 <= [ >fixnum ] [ drop replacement-char ] if
|
||||
] when ; inline
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ M: output-port stream-write1
|
|||
|
||||
: write-in-groups ( byte-array port -- )
|
||||
[ binary-object <direct-uchar-array> ] dip
|
||||
[ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi
|
||||
[ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
|
||||
each ;
|
||||
|
||||
M: output-port stream-write
|
||||
|
|
@ -198,5 +198,3 @@ io.encodings.private ;
|
|||
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
|
||||
|
||||
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
|
||||
|
||||
HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
! Copyright (C) 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien byte-vectors io kernel libc math sequences ;
|
||||
IN: io.streams.byte-array.fast
|
||||
|
||||
! This is split off from io.streams.byte-array because it uses
|
||||
! memcpy, which is a non-core word that only works after the
|
||||
! optimizing compiler has been loaded.
|
||||
|
||||
M: byte-vector stream-write
|
||||
[ dup byte-length tail-slice ]
|
||||
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
||||
[ drop byte-length ]
|
||||
2tri
|
||||
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces growable
|
||||
strings io classes continuations destructors combinators
|
||||
|
|
@ -12,6 +12,10 @@ GENERIC: decode-char ( stream encoding -- char/f )
|
|||
|
||||
GENERIC: encode-char ( char stream encoding -- )
|
||||
|
||||
GENERIC: encode-string ( string stream encoding -- )
|
||||
|
||||
M: object encode-string [ encode-char ] 2curry each ; inline
|
||||
|
||||
GENERIC: <decoder> ( stream encoding -- newstream )
|
||||
|
||||
CONSTANT: replacement-char HEX: fffd
|
||||
|
|
@ -134,13 +138,8 @@ M: encoder stream-element-type
|
|||
M: encoder stream-write1
|
||||
>encoder< encode-char ;
|
||||
|
||||
GENERIC# encoder-write 2 ( string stream encoding -- )
|
||||
|
||||
M: string encoder-write
|
||||
[ encode-char ] 2curry each ;
|
||||
|
||||
M: encoder stream-write
|
||||
>encoder< encoder-write ;
|
||||
>encoder< encode-string ;
|
||||
|
||||
M: encoder dispose stream>> dispose ;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math math.order kernel sequences sbufs vectors growable io
|
||||
continuations namespaces io.encodings combinators strings ;
|
||||
USING: accessors byte-arrays math math.order kernel sequences
|
||||
sbufs vectors growable io continuations namespaces io.encodings
|
||||
combinators strings ;
|
||||
IN: io.encodings.utf8
|
||||
|
||||
! Decoding UTF-8
|
||||
|
|
@ -45,10 +46,10 @@ M: utf8 decode-char
|
|||
! Encoding UTF-8
|
||||
|
||||
: encoded ( stream char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
|
||||
|
||||
: char>utf8 ( stream char -- )
|
||||
{
|
||||
: char>utf8 ( char stream -- )
|
||||
swap {
|
||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||
{ [ dup -11 shift zero? ] [
|
||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||
|
|
@ -65,10 +66,16 @@ M: utf8 decode-char
|
|||
2dup -6 shift encoded
|
||||
encoded
|
||||
]
|
||||
} cond ;
|
||||
} cond ; inline
|
||||
|
||||
M: utf8 encode-char
|
||||
drop swap char>utf8 ;
|
||||
drop char>utf8 ;
|
||||
|
||||
M: utf8 encode-string
|
||||
drop
|
||||
over aux>>
|
||||
[ [ char>utf8 ] curry each ]
|
||||
[ [ >byte-array ] dip stream-write ] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
! Copyright (C) 2003, 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math.private sequences kernel.private
|
||||
math sequences.private slots.private byte-arrays
|
||||
alien.accessors ;
|
||||
math sequences.private slots.private alien.accessors ;
|
||||
IN: strings
|
||||
|
||||
<PRIVATE
|
||||
|
|
|
|||
|
|
@ -91,10 +91,13 @@ TYPED:: make-repeat-fasta ( k: fixnum len: fixnum alu: string -- k': fixnum )
|
|||
n 2 * ALU "Homo sapiens alu" "ONE" write-repeat-fasta
|
||||
|
||||
initial-seed
|
||||
|
||||
n 3 * homo-sapiens-chars homo-sapiens-floats
|
||||
"IUB ambiguity codes" "TWO" write-random-fasta
|
||||
|
||||
n 5 * IUB-chars IUB-floats
|
||||
"Homo sapiens frequency" "THREE" write-random-fasta
|
||||
|
||||
drop
|
||||
] with-file-writer
|
||||
] ;
|
||||
|
|
|
|||
|
|
@ -1,308 +1,83 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien alien.c-types alien.data alien.parser
|
||||
alien.strings arrays assocs byte-arrays classes.struct
|
||||
combinators continuations cuda.ffi destructors fry io
|
||||
io.backend io.encodings.string io.encodings.utf8 kernel lexer
|
||||
locals math math.parser namespaces opengl.gl.extensions
|
||||
prettyprint quotations sequences ;
|
||||
USING: accessors alien alien.data alien.parser alien.strings
|
||||
alien.syntax arrays assocs byte-arrays classes.struct
|
||||
combinators continuations cuda.ffi cuda.memory cuda.utils
|
||||
destructors fry io io.backend io.encodings.string
|
||||
io.encodings.utf8 kernel lexer locals macros math math.parser
|
||||
namespaces nested-comments opengl.gl.extensions parser
|
||||
prettyprint quotations sequences words ;
|
||||
QUALIFIED-WITH: alien.c-types a
|
||||
IN: cuda
|
||||
|
||||
SYMBOL: cuda-device
|
||||
SYMBOL: cuda-context
|
||||
SYMBOL: cuda-module
|
||||
SYMBOL: cuda-function
|
||||
SYMBOL: cuda-launcher
|
||||
SYMBOL: cuda-memory-hashtable
|
||||
|
||||
ERROR: throw-cuda-error n ;
|
||||
|
||||
: cuda-error ( n -- )
|
||||
dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
|
||||
|
||||
: cuda-version ( -- n )
|
||||
int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
|
||||
|
||||
: init-cuda ( -- )
|
||||
0 cuInit cuda-error ;
|
||||
|
||||
TUPLE: launcher
|
||||
{ device integer initial: 0 }
|
||||
{ device-flags initial: 0 }
|
||||
path block-shape shared-size grid ;
|
||||
{ device-flags initial: 0 } ;
|
||||
|
||||
TUPLE: function-launcher
|
||||
dim-block dim-grid shared-size stream ;
|
||||
|
||||
: with-cuda-context ( flags device quot -- )
|
||||
[
|
||||
[ CUcontext <c-object> ] 2dip
|
||||
[ cuCtxCreate cuda-error ] 3keep 2drop *void*
|
||||
] dip
|
||||
H{ } clone cuda-modules set-global
|
||||
H{ } clone cuda-functions set
|
||||
[ create-context ] dip
|
||||
[ '[ _ @ ] ]
|
||||
[ drop '[ _ cuCtxDestroy cuda-error ] ] 2bi
|
||||
[ drop '[ _ destroy-context ] ] 2bi
|
||||
[ ] cleanup ; inline
|
||||
|
||||
: with-cuda-module ( path quot -- )
|
||||
[
|
||||
normalize-path
|
||||
[ CUmodule <c-object> ] dip
|
||||
[ cuModuleLoad cuda-error ] 2keep drop *void*
|
||||
] dip
|
||||
[ '[ _ @ ] ]
|
||||
[ drop '[ _ cuModuleUnload cuda-error ] ] 2bi
|
||||
[ ] cleanup ; inline
|
||||
|
||||
: with-cuda-program ( flags device path quot -- )
|
||||
: with-cuda-program ( flags device quot -- )
|
||||
[ dup cuda-device set ] 2dip
|
||||
'[
|
||||
cuda-context set
|
||||
_ [
|
||||
cuda-module set
|
||||
_ call
|
||||
] with-cuda-module
|
||||
] with-cuda-context ; inline
|
||||
'[ cuda-context set _ call ] with-cuda-context ; inline
|
||||
|
||||
: with-cuda ( launcher quot -- )
|
||||
[
|
||||
init-cuda
|
||||
H{ } clone cuda-memory-hashtable
|
||||
] 2dip '[
|
||||
init-cuda
|
||||
[ H{ } clone cuda-memory-hashtable ] 2dip '[
|
||||
_
|
||||
[ cuda-launcher set ]
|
||||
[ [ device>> ] [ device-flags>> ] [ path>> ] tri ] bi
|
||||
[ [ device>> ] [ device-flags>> ] bi ] bi
|
||||
_ with-cuda-program
|
||||
] with-variable ; inline
|
||||
|
||||
<PRIVATE
|
||||
: c-type>cuda-setter ( c-type -- n cuda-type )
|
||||
{
|
||||
{ [ dup a:int = ] [ drop 4 [ cuda-int* ] ] }
|
||||
{ [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] }
|
||||
{ [ dup a:float = ] [ drop 4 [ cuda-float* ] ] }
|
||||
{ [ dup a:pointer? ] [ drop 4 [ ptr>> cuda-int* ] ] }
|
||||
{ [ dup a:void* = ] [ drop 4 [ ptr>> cuda-int* ] ] }
|
||||
} cond ;
|
||||
|
||||
: #cuda-devices ( -- n )
|
||||
int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
|
||||
|
||||
: n>cuda-device ( n -- device )
|
||||
[ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
|
||||
|
||||
: enumerate-cuda-devices ( -- devices )
|
||||
#cuda-devices iota [ n>cuda-device ] map ;
|
||||
|
||||
: cuda-device-properties ( device -- properties )
|
||||
[ CUdevprop <c-object> ] dip
|
||||
[ cuDeviceGetProperties cuda-error ] 2keep drop
|
||||
CUdevprop memory>struct ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: cuda-devices ( -- assoc )
|
||||
enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
|
||||
|
||||
: cuda-device-name ( n -- string )
|
||||
[ 256 [ <byte-array> ] keep ] dip
|
||||
[ cuDeviceGetName cuda-error ]
|
||||
[ 2drop utf8 alien>string ] 3bi ;
|
||||
|
||||
: cuda-device-capability ( n -- pair )
|
||||
[ int <c-object> int <c-object> ] dip
|
||||
[ cuDeviceComputeCapability cuda-error ]
|
||||
[ drop [ *int ] bi@ ] 3bi 2array ;
|
||||
|
||||
: cuda-device-memory ( n -- bytes )
|
||||
[ uint <c-object> ] dip
|
||||
[ cuDeviceTotalMem cuda-error ]
|
||||
[ drop *uint ] 2bi ;
|
||||
|
||||
: get-cuda-function* ( module string -- function )
|
||||
[ CUfunction <c-object> ] 2dip
|
||||
[ cuModuleGetFunction cuda-error ] 3keep 2drop *void* ;
|
||||
|
||||
: get-cuda-function ( string -- function )
|
||||
[ cuda-module get ] dip get-cuda-function* ;
|
||||
|
||||
: with-cuda-function ( string quot -- )
|
||||
[
|
||||
get-cuda-function cuda-function set
|
||||
] dip call ; inline
|
||||
|
||||
: launch-function* ( function -- ) cuLaunch cuda-error ;
|
||||
|
||||
: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
|
||||
|
||||
: launch-function-grid* ( function width height -- )
|
||||
cuLaunchGrid cuda-error ;
|
||||
|
||||
: launch-function-grid ( width height -- )
|
||||
[ cuda-function get ] 2dip
|
||||
cuLaunchGrid cuda-error ;
|
||||
|
||||
TUPLE: cuda-memory < disposable ptr length ;
|
||||
|
||||
: <cuda-memory> ( ptr length -- obj )
|
||||
cuda-memory new-disposable
|
||||
swap >>length
|
||||
swap >>ptr ;
|
||||
|
||||
: add-cuda-memory ( obj -- obj )
|
||||
dup dup ptr>> cuda-memory-hashtable get set-at ;
|
||||
|
||||
: delete-cuda-memory ( obj -- )
|
||||
cuda-memory-hashtable delete-at ;
|
||||
|
||||
ERROR: invalid-cuda-memory ptr ;
|
||||
|
||||
: cuda-memory-length ( cuda-memory -- n )
|
||||
ptr>> cuda-memory-hashtable get ?at [
|
||||
length>>
|
||||
] [
|
||||
invalid-cuda-memory
|
||||
] if ;
|
||||
|
||||
M: cuda-memory byte-length length>> ;
|
||||
|
||||
: cuda-malloc ( n -- ptr )
|
||||
[ CUdeviceptr <c-object> ] dip
|
||||
[ cuMemAlloc cuda-error ] 2keep
|
||||
[ *int ] dip <cuda-memory> add-cuda-memory ;
|
||||
|
||||
: cuda-free* ( ptr -- )
|
||||
cuMemFree cuda-error ;
|
||||
|
||||
M: cuda-memory dispose ( ptr -- )
|
||||
ptr>> cuda-free* ;
|
||||
|
||||
: host>device ( dest-ptr src-ptr -- )
|
||||
[ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
|
||||
|
||||
:: device>host ( ptr -- seq )
|
||||
ptr byte-length <byte-array>
|
||||
[ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
|
||||
|
||||
: memcpy-device>device ( dest-ptr src-ptr count -- )
|
||||
cuMemcpyDtoD cuda-error ;
|
||||
|
||||
: memcpy-device>array ( dest-array dest-index src-ptr count -- )
|
||||
cuMemcpyDtoA cuda-error ;
|
||||
|
||||
: memcpy-array>device ( dest-ptr src-array src-index count -- )
|
||||
cuMemcpyAtoD cuda-error ;
|
||||
|
||||
: memcpy-array>host ( dest-ptr src-array src-index count -- )
|
||||
cuMemcpyAtoH cuda-error ;
|
||||
|
||||
: memcpy-host>array ( dest-array dest-index src-ptr count -- )
|
||||
cuMemcpyHtoA cuda-error ;
|
||||
|
||||
: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
|
||||
cuMemcpyAtoA cuda-error ;
|
||||
|
||||
: cuda-int* ( function offset value -- )
|
||||
cuParamSeti cuda-error ;
|
||||
|
||||
: cuda-int ( offset value -- )
|
||||
[ cuda-function get ] 2dip cuda-int* ;
|
||||
|
||||
: cuda-float* ( function offset value -- )
|
||||
cuParamSetf cuda-error ;
|
||||
|
||||
: cuda-float ( offset value -- )
|
||||
[ cuda-function get ] 2dip cuda-float* ;
|
||||
|
||||
: cuda-vector* ( function offset ptr n -- )
|
||||
cuParamSetv cuda-error ;
|
||||
|
||||
: cuda-vector ( offset ptr n -- )
|
||||
[ cuda-function get ] 3dip cuda-vector* ;
|
||||
|
||||
: param-size* ( function n -- )
|
||||
cuParamSetSize cuda-error ;
|
||||
|
||||
: param-size ( n -- )
|
||||
[ cuda-function get ] dip param-size* ;
|
||||
|
||||
: malloc-device-string ( string -- n )
|
||||
utf8 encode
|
||||
[ length cuda-malloc ] keep
|
||||
[ host>device ] [ drop ] 2bi ;
|
||||
|
||||
ERROR: bad-cuda-parameter parameter ;
|
||||
|
||||
:: set-parameters ( seq -- )
|
||||
cuda-function get :> function
|
||||
0 :> offset!
|
||||
seq [
|
||||
[ offset ] dip
|
||||
{
|
||||
{ [ dup cuda-memory? ] [ ptr>> cuda-int ] }
|
||||
{ [ dup float? ] [ cuda-float ] }
|
||||
{ [ dup integer? ] [ cuda-int ] }
|
||||
[ bad-cuda-parameter ]
|
||||
} cond
|
||||
offset 4 + offset!
|
||||
] each
|
||||
offset param-size ;
|
||||
|
||||
: cuda-device-attribute ( attribute dev -- n )
|
||||
[ int <c-object> ] 2dip
|
||||
[ cuDeviceGetAttribute cuda-error ]
|
||||
[ 2drop *int ] 3bi ;
|
||||
|
||||
: function-block-shape* ( function x y z -- )
|
||||
cuFuncSetBlockShape cuda-error ;
|
||||
|
||||
: function-block-shape ( x y z -- )
|
||||
[ cuda-function get ] 3dip
|
||||
cuFuncSetBlockShape cuda-error ;
|
||||
|
||||
: function-shared-size* ( function n -- )
|
||||
cuFuncSetSharedSize cuda-error ;
|
||||
|
||||
: function-shared-size ( n -- )
|
||||
[ cuda-function get ] dip
|
||||
cuFuncSetSharedSize cuda-error ;
|
||||
|
||||
: launch ( -- )
|
||||
cuda-launcher get {
|
||||
[ block-shape>> first3 function-block-shape ]
|
||||
[ shared-size>> function-shared-size ]
|
||||
: run-function-launcher ( function-launcher function -- )
|
||||
swap
|
||||
{
|
||||
[ dim-block>> first3 function-block-shape* ]
|
||||
[ shared-size>> function-shared-size* ]
|
||||
[
|
||||
grid>> [
|
||||
launch-function
|
||||
dim-grid>> [
|
||||
launch-function*
|
||||
] [
|
||||
first2 launch-function-grid
|
||||
first2 launch-function-grid*
|
||||
] if-empty
|
||||
]
|
||||
} cleave ;
|
||||
} 2cleave ;
|
||||
|
||||
: cuda-device. ( n -- )
|
||||
{
|
||||
[ "Device: " write number>string print ]
|
||||
[ "Name: " write cuda-device-name print ]
|
||||
[ "Memory: " write cuda-device-memory number>string print ]
|
||||
[
|
||||
"Capability: " write
|
||||
cuda-device-capability [ number>string ] map " " join print
|
||||
: cuda-argument-setter ( offset c-type -- offset' quot )
|
||||
c-type>cuda-setter
|
||||
[ over [ + ] dip ] dip
|
||||
'[ swap _ swap _ call ] ;
|
||||
|
||||
MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) )
|
||||
[ 0 ] dip [ cuda-argument-setter ] map reverse
|
||||
swap '[ _ param-size* ] suffix
|
||||
'[ _ cleave ] ;
|
||||
|
||||
: define-cuda-word ( word module-name function-name arguments -- )
|
||||
[
|
||||
'[
|
||||
_ _ cached-function
|
||||
[ nip _ cuda-arguments ]
|
||||
[ run-function-launcher ] 2bi
|
||||
]
|
||||
[ "Properties: " write cuda-device-properties . ]
|
||||
[
|
||||
"CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
|
||||
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
|
||||
cuda-device-attribute number>string print
|
||||
]
|
||||
} cleave ;
|
||||
|
||||
: cuda. ( -- )
|
||||
"CUDA Version: " write cuda-version number>string print nl
|
||||
#cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
|
||||
|
||||
|
||||
: test-cuda0 ( -- )
|
||||
T{ launcher
|
||||
{ path "vocab:cuda/hello.ptx" }
|
||||
{ block-shape { 6 6 6 } }
|
||||
{ shared-size 2 }
|
||||
{ grid { 2 6 } }
|
||||
} [
|
||||
"helloWorld" [
|
||||
"Hello World!" [ - ] map-index
|
||||
malloc-device-string &dispose
|
||||
|
||||
[ 1array set-parameters ]
|
||||
[ drop launch ]
|
||||
[ device>host utf8 alien>string . ] tri
|
||||
] with-cuda-function
|
||||
] with-cuda ;
|
||||
]
|
||||
[ 2nip \ function-launcher suffix a:void function-effect ]
|
||||
3bi define-declared ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types alien.strings cuda cuda.memory cuda.syntax
|
||||
destructors io io.encodings.utf8 kernel locals math sequences ;
|
||||
IN: cuda.demos.hello-world
|
||||
|
||||
CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx
|
||||
|
||||
CUDA-FUNCTION: helloWorld ( char* string-ptr ) ;
|
||||
|
||||
:: cuda-hello-world ( -- )
|
||||
T{ launcher { device 0 } } [
|
||||
"Hello World!" [ - ] map-index malloc-device-string
|
||||
&dispose dup :> str
|
||||
|
||||
{ 6 1 1 } { 2 1 } 1 3<<< helloWorld
|
||||
|
||||
str device>host utf8 alien>string print
|
||||
] with-cuda ;
|
||||
|
||||
MAIN: cuda-hello-world
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
Doug Coleman
|
||||
Joe Groff
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types cuda cuda.syntax locals ;
|
||||
IN: cuda.demos.prefix-sum
|
||||
|
||||
CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx
|
||||
|
||||
CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ;
|
||||
|
||||
:: cuda-prefix-sum ( -- )
|
||||
T{ launcher { device 0 } }
|
||||
[
|
||||
! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block
|
||||
] with-cuda ;
|
||||
|
||||
MAIN: cuda-prefix-sum
|
||||
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types alien.data alien.strings arrays assocs
|
||||
byte-arrays classes.struct combinators cuda.ffi cuda.utils io
|
||||
io.encodings.utf8 kernel math.parser prettyprint sequences ;
|
||||
IN: cuda.devices
|
||||
|
||||
: #cuda-devices ( -- n )
|
||||
int <c-object> [ cuDeviceGetCount cuda-error ] keep *int ;
|
||||
|
||||
: n>cuda-device ( n -- device )
|
||||
[ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ;
|
||||
|
||||
: enumerate-cuda-devices ( -- devices )
|
||||
#cuda-devices iota [ n>cuda-device ] map ;
|
||||
|
||||
: cuda-device-properties ( device -- properties )
|
||||
[ CUdevprop <c-object> ] dip
|
||||
[ cuDeviceGetProperties cuda-error ] 2keep drop
|
||||
CUdevprop memory>struct ;
|
||||
|
||||
: cuda-devices ( -- assoc )
|
||||
enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
|
||||
|
||||
: cuda-device-name ( n -- string )
|
||||
[ 256 [ <byte-array> ] keep ] dip
|
||||
[ cuDeviceGetName cuda-error ]
|
||||
[ 2drop utf8 alien>string ] 3bi ;
|
||||
|
||||
: cuda-device-capability ( n -- pair )
|
||||
[ int <c-object> int <c-object> ] dip
|
||||
[ cuDeviceComputeCapability cuda-error ]
|
||||
[ drop [ *int ] bi@ ] 3bi 2array ;
|
||||
|
||||
: cuda-device-memory ( n -- bytes )
|
||||
[ uint <c-object> ] dip
|
||||
[ cuDeviceTotalMem cuda-error ]
|
||||
[ drop *uint ] 2bi ;
|
||||
|
||||
: cuda-device-attribute ( attribute dev -- n )
|
||||
[ int <c-object> ] 2dip
|
||||
[ cuDeviceGetAttribute cuda-error ]
|
||||
[ 2drop *int ] 3bi ;
|
||||
|
||||
: cuda-device. ( n -- )
|
||||
{
|
||||
[ "Device: " write number>string print ]
|
||||
[ "Name: " write cuda-device-name print ]
|
||||
[ "Memory: " write cuda-device-memory number>string print ]
|
||||
[
|
||||
"Capability: " write
|
||||
cuda-device-capability [ number>string ] map " " join print
|
||||
]
|
||||
[ "Properties: " write cuda-device-properties . ]
|
||||
[
|
||||
"CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
|
||||
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
|
||||
cuda-device-attribute number>string print
|
||||
]
|
||||
} cleave ;
|
||||
|
||||
: cuda. ( -- )
|
||||
"CUDA Version: " write cuda-version number>string print nl
|
||||
#cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien alien.data assocs byte-arrays cuda.ffi
|
||||
cuda.utils destructors io.encodings.string io.encodings.utf8
|
||||
kernel locals namespaces sequences ;
|
||||
QUALIFIED-WITH: alien.c-types a
|
||||
IN: cuda.memory
|
||||
|
||||
SYMBOL: cuda-memory-hashtable
|
||||
|
||||
TUPLE: cuda-memory < disposable ptr length ;
|
||||
|
||||
: <cuda-memory> ( ptr length -- obj )
|
||||
cuda-memory new-disposable
|
||||
swap >>length
|
||||
swap >>ptr ;
|
||||
|
||||
: add-cuda-memory ( obj -- obj )
|
||||
dup dup ptr>> cuda-memory-hashtable get set-at ;
|
||||
|
||||
: delete-cuda-memory ( obj -- )
|
||||
cuda-memory-hashtable delete-at ;
|
||||
|
||||
ERROR: invalid-cuda-memory ptr ;
|
||||
|
||||
: cuda-memory-length ( cuda-memory -- n )
|
||||
ptr>> cuda-memory-hashtable get ?at [
|
||||
length>>
|
||||
] [
|
||||
invalid-cuda-memory
|
||||
] if ;
|
||||
|
||||
M: cuda-memory byte-length length>> ;
|
||||
|
||||
: cuda-malloc ( n -- ptr )
|
||||
[ CUdeviceptr <c-object> ] dip
|
||||
[ cuMemAlloc cuda-error ] 2keep
|
||||
[ a:*int ] dip <cuda-memory> add-cuda-memory ;
|
||||
|
||||
: cuda-free* ( ptr -- )
|
||||
cuMemFree cuda-error ;
|
||||
|
||||
M: cuda-memory dispose ( ptr -- )
|
||||
ptr>> cuda-free* ;
|
||||
|
||||
: memcpy-device>device ( dest-ptr src-ptr count -- )
|
||||
cuMemcpyDtoD cuda-error ;
|
||||
|
||||
: memcpy-device>array ( dest-array dest-index src-ptr count -- )
|
||||
cuMemcpyDtoA cuda-error ;
|
||||
|
||||
: memcpy-array>device ( dest-ptr src-array src-index count -- )
|
||||
cuMemcpyAtoD cuda-error ;
|
||||
|
||||
: memcpy-array>host ( dest-ptr src-array src-index count -- )
|
||||
cuMemcpyAtoH cuda-error ;
|
||||
|
||||
: memcpy-host>array ( dest-array dest-index src-ptr count -- )
|
||||
cuMemcpyHtoA cuda-error ;
|
||||
|
||||
: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
|
||||
cuMemcpyAtoA cuda-error ;
|
||||
|
||||
: host>device ( dest-ptr src-ptr -- )
|
||||
[ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
|
||||
|
||||
:: device>host ( ptr -- seq )
|
||||
ptr byte-length <byte-array>
|
||||
[ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
|
||||
|
||||
: malloc-device-string ( string -- n )
|
||||
utf8 encode
|
||||
[ length cuda-malloc ] keep
|
||||
[ host>device ] [ drop ] 2bi ;
|
||||
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.parser cuda cuda.utils io.backend kernel lexer
|
||||
namespaces parser ;
|
||||
IN: cuda.syntax
|
||||
|
||||
SYNTAX: CUDA-LIBRARY:
|
||||
scan scan normalize-path
|
||||
[ add-cuda-library ]
|
||||
[ drop current-cuda-library set-global ] 2bi ;
|
||||
|
||||
SYNTAX: CUDA-FUNCTION:
|
||||
scan [ create-in current-cuda-library get ] [ ] bi
|
||||
";" scan-c-args drop define-cuda-word ;
|
||||
|
||||
: 3<<< ( dim-block dim-grid shared-size -- function-launcher )
|
||||
f function-launcher boa ;
|
||||
|
||||
: 4<<< ( dim-block dim-grid shared-size stream -- function-launcher )
|
||||
function-launcher boa ;
|
||||
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
! Copyright (C) 2010 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien.c-types alien.data alien.strings arrays
|
||||
assocs byte-arrays classes.struct combinators cuda.ffi io
|
||||
io.backend io.encodings.utf8 kernel math.parser namespaces
|
||||
prettyprint sequences ;
|
||||
IN: cuda.utils
|
||||
|
||||
SYMBOL: cuda-device
|
||||
SYMBOL: cuda-context
|
||||
SYMBOL: cuda-module
|
||||
SYMBOL: cuda-function
|
||||
SYMBOL: cuda-launcher
|
||||
|
||||
SYMBOL: cuda-modules
|
||||
SYMBOL: cuda-functions
|
||||
|
||||
ERROR: throw-cuda-error n ;
|
||||
|
||||
: cuda-error ( n -- )
|
||||
dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
|
||||
|
||||
: init-cuda ( -- )
|
||||
0 cuInit cuda-error ;
|
||||
|
||||
: cuda-version ( -- n )
|
||||
int <c-object> [ cuDriverGetVersion cuda-error ] keep *int ;
|
||||
|
||||
: get-function-ptr* ( module string -- function )
|
||||
[ CUfunction <c-object> ] 2dip
|
||||
[ cuModuleGetFunction cuda-error ] 3keep 2drop *void* ;
|
||||
|
||||
: get-function-ptr ( string -- function )
|
||||
[ cuda-module get ] dip get-function-ptr* ;
|
||||
|
||||
: with-cuda-function ( string quot -- )
|
||||
[
|
||||
get-function-ptr* cuda-function set
|
||||
] dip call ; inline
|
||||
|
||||
: create-context ( flags device -- context )
|
||||
[ CUcontext <c-object> ] 2dip
|
||||
[ cuCtxCreate cuda-error ] 3keep 2drop *void* ;
|
||||
|
||||
: destroy-context ( context -- ) cuCtxDestroy cuda-error ;
|
||||
|
||||
SYMBOL: cuda-libraries
|
||||
cuda-libraries [ H{ } clone ] initialize
|
||||
|
||||
SYMBOL: current-cuda-library
|
||||
|
||||
TUPLE: cuda-library name path handle ;
|
||||
|
||||
: <cuda-library> ( name path -- obj )
|
||||
\ cuda-library new
|
||||
swap >>path
|
||||
swap >>name ;
|
||||
|
||||
: add-cuda-library ( name path -- )
|
||||
normalize-path <cuda-library>
|
||||
dup name>> cuda-libraries get-global set-at ;
|
||||
|
||||
: ?delete-at ( key assoc -- old/key ? )
|
||||
2dup delete-at* [ 2nip t ] [ 2drop f ] if ; inline
|
||||
|
||||
ERROR: no-cuda-library name ;
|
||||
|
||||
: load-module ( path -- module )
|
||||
[ CUmodule <c-object> ] dip
|
||||
[ cuModuleLoad cuda-error ] 2keep drop *void* ;
|
||||
|
||||
: unload-module ( module -- )
|
||||
cuModuleUnload cuda-error ;
|
||||
|
||||
: load-cuda-library ( library -- handle )
|
||||
path>> load-module ;
|
||||
|
||||
: lookup-cuda-library ( name -- cuda-library )
|
||||
cuda-libraries get ?at [ no-cuda-library ] unless ;
|
||||
|
||||
: remove-cuda-library ( name -- library )
|
||||
cuda-libraries get ?delete-at [ no-cuda-library ] unless ;
|
||||
|
||||
: unload-cuda-library ( name -- )
|
||||
remove-cuda-library handle>> unload-module ;
|
||||
|
||||
|
||||
: cached-module ( module-name -- alien )
|
||||
lookup-cuda-library
|
||||
cuda-modules get-global [ load-cuda-library ] cache ;
|
||||
|
||||
: cached-function ( module-name function-name -- alien )
|
||||
[ cached-module ] dip
|
||||
2array cuda-functions get [ first2 get-function-ptr* ] cache ;
|
||||
|
||||
: launch-function* ( function -- ) cuLaunch cuda-error ;
|
||||
|
||||
: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
|
||||
|
||||
: cuda-int* ( function offset value -- )
|
||||
cuParamSeti cuda-error ;
|
||||
|
||||
: cuda-int ( offset value -- )
|
||||
[ cuda-function get ] 2dip cuda-int* ;
|
||||
|
||||
: cuda-float* ( function offset value -- )
|
||||
cuParamSetf cuda-error ;
|
||||
|
||||
: cuda-float ( offset value -- )
|
||||
[ cuda-function get ] 2dip cuda-float* ;
|
||||
|
||||
: cuda-vector* ( function offset ptr n -- )
|
||||
cuParamSetv cuda-error ;
|
||||
|
||||
: cuda-vector ( offset ptr n -- )
|
||||
[ cuda-function get ] 3dip cuda-vector* ;
|
||||
|
||||
: param-size* ( function n -- )
|
||||
cuParamSetSize cuda-error ;
|
||||
|
||||
: param-size ( n -- )
|
||||
[ cuda-function get ] dip param-size* ;
|
||||
|
||||
: launch-function-grid* ( function width height -- )
|
||||
cuLaunchGrid cuda-error ;
|
||||
|
||||
: launch-function-grid ( width height -- )
|
||||
[ cuda-function get ] 2dip
|
||||
cuLaunchGrid cuda-error ;
|
||||
|
||||
: function-block-shape* ( function x y z -- )
|
||||
cuFuncSetBlockShape cuda-error ;
|
||||
|
||||
: function-block-shape ( x y z -- )
|
||||
[ cuda-function get ] 3dip
|
||||
cuFuncSetBlockShape cuda-error ;
|
||||
|
||||
: function-shared-size* ( function n -- )
|
||||
cuFuncSetSharedSize cuda-error ;
|
||||
|
||||
: function-shared-size ( n -- )
|
||||
[ cuda-function get ] dip
|
||||
cuFuncSetSharedSize cuda-error ;
|
||||
|
|
@ -265,6 +265,9 @@ struct initial_code_block_visitor {
|
|||
case RT_LITERAL:
|
||||
op.store_value(next_literal());
|
||||
break;
|
||||
case RT_FLOAT:
|
||||
op.store_float(next_literal());
|
||||
break;
|
||||
case RT_ENTRY_POINT:
|
||||
op.store_value(parent->compute_entry_point_address(next_literal()));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -111,6 +111,9 @@ struct code_block_compaction_relocation_visitor {
|
|||
case RT_LITERAL:
|
||||
op.store_value(slot_forwarder.visit_pointer(op.load_value(old_offset)));
|
||||
break;
|
||||
case RT_FLOAT:
|
||||
op.store_float(slot_forwarder.visit_pointer(op.load_float(old_offset)));
|
||||
break;
|
||||
case RT_ENTRY_POINT:
|
||||
case RT_ENTRY_POINT_PIC:
|
||||
case RT_ENTRY_POINT_PIC_TAIL:
|
||||
|
|
|
|||
|
|
@ -185,6 +185,9 @@ struct code_block_fixup_relocation_visitor {
|
|||
case RT_LITERAL:
|
||||
op.store_value(data_visitor.visit_pointer(op.load_value(old_offset)));
|
||||
break;
|
||||
case RT_FLOAT:
|
||||
op.store_float(data_visitor.visit_pointer(op.load_float(old_offset)));
|
||||
break;
|
||||
case RT_ENTRY_POINT:
|
||||
case RT_ENTRY_POINT_PIC:
|
||||
case RT_ENTRY_POINT_PIC_TAIL:
|
||||
|
|
|
|||
|
|
@ -62,6 +62,16 @@ fixnum instruction_operand::load_value()
|
|||
return load_value(pointer);
|
||||
}
|
||||
|
||||
cell instruction_operand::load_float()
|
||||
{
|
||||
return (cell)load_value() - boxed_float_offset;
|
||||
}
|
||||
|
||||
cell instruction_operand::load_float(cell pointer)
|
||||
{
|
||||
return (cell)load_value(pointer) - boxed_float_offset;
|
||||
}
|
||||
|
||||
code_block *instruction_operand::load_code_block(cell relative_to)
|
||||
{
|
||||
return ((code_block *)load_value(relative_to) - 1);
|
||||
|
|
@ -135,6 +145,11 @@ void instruction_operand::store_value(fixnum absolute_value)
|
|||
}
|
||||
}
|
||||
|
||||
void instruction_operand::store_float(cell value)
|
||||
{
|
||||
store_value((fixnum)value + boxed_float_offset);
|
||||
}
|
||||
|
||||
void instruction_operand::store_code_block(code_block *compiled)
|
||||
{
|
||||
store_value((cell)compiled->entry_point());
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ enum relocation_type {
|
|||
type since its used in a situation where relocation arguments cannot
|
||||
be passed in, and so RT_DLSYM is inappropriate (Windows only) */
|
||||
RT_EXCEPTION_HANDLER,
|
||||
/* pointer to a float's payload */
|
||||
RT_FLOAT,
|
||||
|
||||
};
|
||||
|
||||
enum relocation_class {
|
||||
|
|
@ -112,6 +115,7 @@ struct relocation_entry {
|
|||
case RT_CARDS_OFFSET:
|
||||
case RT_DECKS_OFFSET:
|
||||
case RT_EXCEPTION_HANDLER:
|
||||
case RT_FLOAT:
|
||||
return 0;
|
||||
default:
|
||||
critical_error("Bad rel type",rel_type());
|
||||
|
|
@ -152,12 +156,15 @@ struct instruction_operand {
|
|||
fixnum load_value_masked(cell mask, cell bits, cell shift);
|
||||
fixnum load_value(cell relative_to);
|
||||
fixnum load_value();
|
||||
cell load_float(cell relative_to);
|
||||
cell load_float();
|
||||
code_block *load_code_block(cell relative_to);
|
||||
code_block *load_code_block();
|
||||
|
||||
void store_value_2_2(fixnum value);
|
||||
void store_value_masked(fixnum value, cell mask, cell shift);
|
||||
void store_value(fixnum value);
|
||||
void store_float(cell value);
|
||||
void store_code_block(code_block *compiled);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -246,6 +246,8 @@ struct wrapper : public object {
|
|||
cell object;
|
||||
};
|
||||
|
||||
const fixnum boxed_float_offset = 8 - FLOAT_TYPE;
|
||||
|
||||
/* Assembly code makes assumptions about the layout of this struct */
|
||||
struct boxed_float : object {
|
||||
static const cell type_number = FLOAT_TYPE;
|
||||
|
|
|
|||
|
|
@ -192,8 +192,17 @@ struct literal_references_visitor {
|
|||
|
||||
void operator()(instruction_operand op)
|
||||
{
|
||||
if(op.rel_type() == RT_LITERAL)
|
||||
switch(op.rel_type())
|
||||
{
|
||||
case RT_LITERAL:
|
||||
op.store_value(visitor->visit_pointer(op.load_value()));
|
||||
break;
|
||||
case RT_FLOAT:
|
||||
op.store_float(visitor->visit_pointer(op.load_float()));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue