math.vectors.simd: add saturated arithmetic operations
parent
78c949b9b7
commit
66871995c9
|
@ -1,5 +1,6 @@
|
||||||
USING: alien alien.syntax alien.c-types kernel tools.test
|
USING: alien alien.syntax alien.c-types kernel tools.test
|
||||||
sequences system libc alien.strings io.encodings.utf8 ;
|
sequences system libc alien.strings io.encodings.utf8
|
||||||
|
math.constants ;
|
||||||
IN: alien.c-types.tests
|
IN: alien.c-types.tests
|
||||||
|
|
||||||
CONSTANT: xyz 123
|
CONSTANT: xyz 123
|
||||||
|
@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE
|
||||||
os windows? cpu x86.64? and [
|
os windows? cpu x86.64? and [
|
||||||
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
|
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
|
||||||
] when
|
] when
|
||||||
|
|
||||||
|
[ 0 ] [ -10 uchar c-type-clamp ] unit-test
|
||||||
|
[ 12 ] [ 12 uchar c-type-clamp ] unit-test
|
||||||
|
[ -10 ] [ -10 char c-type-clamp ] unit-test
|
||||||
|
[ 127 ] [ 230 char c-type-clamp ] unit-test
|
||||||
|
[ t ] [ pi dup float c-type-clamp = ] unit-test
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
! Copyright (C) 2004, 2009 Slava Pestov.
|
! Copyright (C) 2004, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: byte-arrays arrays assocs kernel kernel.private math
|
USING: byte-arrays arrays assocs kernel kernel.private math
|
||||||
namespaces make parser sequences strings words splitting math.parser
|
math.order math.parser namespaces make parser sequences strings
|
||||||
cpu.architecture alien alien.accessors alien.strings quotations
|
words splitting cpu.architecture alien alien.accessors
|
||||||
layouts system compiler.units io io.files io.encodings.binary
|
alien.strings quotations layouts system compiler.units io
|
||||||
io.streams.memory accessors combinators effects continuations fry
|
io.files io.encodings.binary io.streams.memory accessors
|
||||||
classes vocabs vocabs.loader words.symbol ;
|
combinators effects continuations fry classes vocabs
|
||||||
|
vocabs.loader words.symbol ;
|
||||||
QUALIFIED: math
|
QUALIFIED: math
|
||||||
IN: alien.c-types
|
IN: alien.c-types
|
||||||
|
|
||||||
|
@ -480,3 +481,17 @@ M: int-4-rep rep-component-type drop int ;
|
||||||
M: uint-4-rep rep-component-type drop uint ;
|
M: uint-4-rep rep-component-type drop uint ;
|
||||||
M: float-4-rep rep-component-type drop float ;
|
M: float-4-rep rep-component-type drop float ;
|
||||||
M: double-2-rep rep-component-type drop double ;
|
M: double-2-rep rep-component-type drop double ;
|
||||||
|
|
||||||
|
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
|
||||||
|
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
|
||||||
|
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
|
||||||
|
: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
|
||||||
|
|
||||||
|
: c-type-interval ( c-type -- from to )
|
||||||
|
{
|
||||||
|
{ [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
|
||||||
|
{ [ dup { char short int long longlong } memq? ] [ signed-interval ] }
|
||||||
|
{ [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
|
||||||
|
} cond ; foldable
|
||||||
|
|
||||||
|
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline
|
||||||
|
|
|
@ -305,7 +305,7 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##sub-vector
|
PURE-INSN: ##saturated-add-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
@ -315,11 +315,26 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##sub-vector
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##saturated-sub-vector
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##mul-vector
|
PURE-INSN: ##mul-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##saturated-mul-vector
|
||||||
|
def: dst
|
||||||
|
use: src1 src2
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##div-vector
|
PURE-INSN: ##div-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
|
|
|
@ -151,13 +151,16 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
|
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
|
||||||
} enable-intrinsics ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
: enable-sse2-simd ( -- )
|
: enable-simd ( -- )
|
||||||
{
|
{
|
||||||
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
|
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
|
||||||
|
@ -165,14 +168,10 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||||
|
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
|
||||||
} enable-intrinsics ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
: enable-sse3-simd ( -- )
|
|
||||||
{
|
|
||||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
|
||||||
} enable-intrinsics ;
|
|
||||||
|
|
||||||
: emit-intrinsic ( node word -- )
|
: emit-intrinsic ( node word -- )
|
||||||
"intrinsic" word-prop call( node -- ) ;
|
"intrinsic" word-prop call( node -- ) ;
|
||||||
|
|
|
@ -47,9 +47,12 @@ UNION: two-operand-insn
|
||||||
##min-float
|
##min-float
|
||||||
##max-float
|
##max-float
|
||||||
##add-vector
|
##add-vector
|
||||||
##sub-vector
|
##saturated-add-vector
|
||||||
##add-sub-vector
|
##add-sub-vector
|
||||||
|
##sub-vector
|
||||||
|
##saturated-sub-vector
|
||||||
##mul-vector
|
##mul-vector
|
||||||
|
##saturated-mul-vector
|
||||||
##div-vector
|
##div-vector
|
||||||
##min-vector
|
##min-vector
|
||||||
##max-vector ;
|
##max-vector ;
|
||||||
|
|
|
@ -169,9 +169,12 @@ CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||||
CODEGEN: ##box-vector %box-vector
|
CODEGEN: ##box-vector %box-vector
|
||||||
CODEGEN: ##add-vector %add-vector
|
CODEGEN: ##add-vector %add-vector
|
||||||
CODEGEN: ##sub-vector %sub-vector
|
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||||
CODEGEN: ##add-sub-vector %add-sub-vector
|
CODEGEN: ##add-sub-vector %add-sub-vector
|
||||||
|
CODEGEN: ##sub-vector %sub-vector
|
||||||
|
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
|
||||||
CODEGEN: ##mul-vector %mul-vector
|
CODEGEN: ##mul-vector %mul-vector
|
||||||
|
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
|
||||||
CODEGEN: ##div-vector %div-vector
|
CODEGEN: ##div-vector %div-vector
|
||||||
CODEGEN: ##min-vector %min-vector
|
CODEGEN: ##min-vector %min-vector
|
||||||
CODEGEN: ##max-vector %max-vector
|
CODEGEN: ##max-vector %max-vector
|
||||||
|
|
|
@ -18,6 +18,7 @@ compiler.tree.propagation.constraints
|
||||||
compiler.tree.propagation.call-effect
|
compiler.tree.propagation.call-effect
|
||||||
compiler.tree.propagation.transforms
|
compiler.tree.propagation.transforms
|
||||||
compiler.tree.propagation.simd ;
|
compiler.tree.propagation.simd ;
|
||||||
|
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
|
||||||
IN: compiler.tree.propagation.known-words
|
IN: compiler.tree.propagation.known-words
|
||||||
|
|
||||||
{ + - * / }
|
{ + - * / }
|
||||||
|
@ -260,15 +261,9 @@ generic-comparison-ops [
|
||||||
alien-unsigned-8
|
alien-unsigned-8
|
||||||
} [
|
} [
|
||||||
dup name>> {
|
dup name>> {
|
||||||
{
|
{ [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
|
||||||
[ "alien-signed-" ?head ]
|
{ [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
|
||||||
[ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ]
|
} cond [a,b]
|
||||||
}
|
|
||||||
{
|
|
||||||
[ "alien-unsigned-" ?head ]
|
|
||||||
[ string>number 8 * 2^ 1 - 0 swap [a,b] ]
|
|
||||||
}
|
|
||||||
} cond
|
|
||||||
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
|
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
|
||||||
'[ 2drop _ ] "outputs" set-word-prop
|
'[ 2drop _ ] "outputs" set-word-prop
|
||||||
] each
|
] each
|
||||||
|
|
|
@ -181,9 +181,12 @@ HOOK: %broadcast-vector cpu ( dst src rep -- )
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -194,9 +197,12 @@ HOOK: %broadcast-vector-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %sub-vector-reps cpu ( -- reps )
|
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
HOOK: %add-sub-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %sub-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
|
||||||
HOOK: %mul-vector-reps cpu ( -- reps )
|
HOOK: %mul-vector-reps cpu ( -- reps )
|
||||||
|
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
|
||||||
HOOK: %div-vector-reps cpu ( -- reps )
|
HOOK: %div-vector-reps cpu ( -- reps )
|
||||||
HOOK: %min-vector-reps cpu ( -- reps )
|
HOOK: %min-vector-reps cpu ( -- reps )
|
||||||
HOOK: %max-vector-reps cpu ( -- reps )
|
HOOK: %max-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -323,6 +323,30 @@ M: x86 %add-vector-reps
|
||||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
|
||||||
|
{
|
||||||
|
{ char-16-rep [ PADDSB ] }
|
||||||
|
{ uchar-16-rep [ PADDUSB ] }
|
||||||
|
{ short-8-rep [ PADDSW ] }
|
||||||
|
{ ushort-8-rep [ PADDUSW ] }
|
||||||
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %saturated-add-vector-reps
|
||||||
|
{
|
||||||
|
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
||||||
|
{
|
||||||
|
{ float-4-rep [ ADDSUBPS ] }
|
||||||
|
{ double-2-rep [ ADDSUBPD ] }
|
||||||
|
} case drop ;
|
||||||
|
|
||||||
|
M: x86 %add-sub-vector-reps
|
||||||
|
{
|
||||||
|
{ sse3? { float-4-rep double-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ SUBPS ] }
|
{ float-4-rep [ SUBPS ] }
|
||||||
|
@ -341,15 +365,17 @@ M: x86 %sub-vector-reps
|
||||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ADDSUBPS ] }
|
{ char-16-rep [ PSUBSB ] }
|
||||||
{ double-2-rep [ ADDSUBPD ] }
|
{ uchar-16-rep [ PSUBUSB ] }
|
||||||
|
{ short-8-rep [ PSUBSW ] }
|
||||||
|
{ ushort-8-rep [ PSUBUSW ] }
|
||||||
} case drop ;
|
} case drop ;
|
||||||
|
|
||||||
M: x86 %add-sub-vector-reps
|
M: x86 %saturated-sub-vector-reps
|
||||||
{
|
{
|
||||||
{ sse3? { float-4-rep double-2-rep } }
|
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||||
|
@ -368,6 +394,10 @@ M: x86 %mul-vector-reps
|
||||||
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %saturated-mul-vector-reps
|
||||||
|
! No multiplication with saturation on x86
|
||||||
|
{ } ;
|
||||||
|
|
||||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||||
{
|
{
|
||||||
{ float-4-rep [ DIVPS ] }
|
{ float-4-rep [ DIVPS ] }
|
||||||
|
@ -854,46 +884,29 @@ M: x86 small-enough? ( n -- ? )
|
||||||
#! set up by the caller.
|
#! set up by the caller.
|
||||||
stack-frame get total-size>> + stack@ ;
|
stack-frame get total-size>> + stack@ ;
|
||||||
|
|
||||||
: enable-sse2 ( -- )
|
:: install-sse2-check ( -- )
|
||||||
enable-float-intrinsics
|
|
||||||
enable-fsqrt
|
|
||||||
enable-float-min/max
|
|
||||||
enable-sse2-simd ;
|
|
||||||
|
|
||||||
: enable-sse3 ( -- )
|
|
||||||
enable-sse2
|
|
||||||
enable-sse3-simd ;
|
|
||||||
|
|
||||||
enable-min/max
|
|
||||||
|
|
||||||
:: install-sse-check ( version -- )
|
|
||||||
[
|
[
|
||||||
sse-version version < [
|
sse-version 20 < [
|
||||||
"This image was built to use " write
|
"This image was built to use SSE2 but your CPU does not support it." print
|
||||||
version sse-string write
|
|
||||||
" but your CPU only supports " write
|
|
||||||
sse-version sse-string write "." print
|
|
||||||
"You will need to bootstrap Factor again." print
|
"You will need to bootstrap Factor again." print
|
||||||
flush
|
flush
|
||||||
1 exit
|
1 exit
|
||||||
] when
|
] when
|
||||||
] "cpu.x86" add-init-hook ;
|
] "cpu.x86" add-init-hook ;
|
||||||
|
|
||||||
: enable-sse ( version -- )
|
: enable-sse2 ( version -- )
|
||||||
{
|
20 >= [
|
||||||
{ 00 [ ] }
|
enable-float-intrinsics
|
||||||
{ 10 [ ] }
|
enable-fsqrt
|
||||||
{ 20 [ enable-sse2 ] }
|
enable-float-min/max
|
||||||
{ 30 [ enable-sse3 ] }
|
install-sse2-check
|
||||||
{ 33 [ enable-sse3 ] }
|
] when ;
|
||||||
{ 41 [ enable-sse3 ] }
|
|
||||||
{ 42 [ enable-sse3 ] }
|
|
||||||
} case ;
|
enable-simd
|
||||||
|
enable-min/max
|
||||||
|
|
||||||
: check-sse ( -- )
|
: check-sse ( -- )
|
||||||
[ { sse_version } compile ] with-optimizer
|
[ { sse_version } compile ] with-optimizer
|
||||||
|
"Checking for multimedia extensions: " write sse-version
|
||||||
"Checking for multimedia extensions: " write sse-version 30 min
|
[ sse-string write " detected" print ] [ enable-sse2 ] bi ;
|
||||||
[ sse-string write " detected" print ]
|
|
||||||
[ install-sse-check ]
|
|
||||||
[ enable-sse ] tri ;
|
|
||||||
|
|
|
@ -41,9 +41,12 @@ MACRO: simd-boa ( rep class -- simd-array )
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
{ v+ (simd-v+) }
|
{ v+ (simd-v+) }
|
||||||
{ v- (simd-v-) }
|
{ vs+ (simd-vs+) }
|
||||||
{ v+- (simd-v+-) }
|
{ v+- (simd-v+-) }
|
||||||
|
{ v- (simd-v-) }
|
||||||
|
{ vs- (simd-vs-) }
|
||||||
{ v* (simd-v*) }
|
{ v* (simd-v*) }
|
||||||
|
{ vs* (simd-vs*) }
|
||||||
{ v/ (simd-v/) }
|
{ v/ (simd-v/) }
|
||||||
{ vmin (simd-vmin) }
|
{ vmin (simd-vmin) }
|
||||||
{ vmax (simd-vmax) }
|
{ vmax (simd-vmax) }
|
||||||
|
@ -111,7 +114,7 @@ A{ DEFINES ${A}{
|
||||||
NTH [ T dup c-type-getter-boxer array-accessor ]
|
NTH [ T dup c-type-getter-boxer array-accessor ]
|
||||||
SET-NTH [ T dup c-setter array-accessor ]
|
SET-NTH [ T dup c-setter array-accessor ]
|
||||||
|
|
||||||
A-rep IS ${A}-rep
|
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
|
@ -142,6 +145,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
|
||||||
|
|
||||||
M: A byte-length underlying>> length ; inline
|
M: A byte-length underlying>> length ; inline
|
||||||
|
|
||||||
|
M: A element-type drop A-rep rep-component-type ;
|
||||||
|
|
||||||
M: A pprint-delims drop \ A{ \ } ;
|
M: A pprint-delims drop \ A{ \ } ;
|
||||||
|
|
||||||
M: A >pprint-sequence ;
|
M: A >pprint-sequence ;
|
||||||
|
@ -172,9 +177,12 @@ INSTANCE: A sequence
|
||||||
|
|
||||||
\ A \ A-with \ A-rep H{
|
\ A \ A-with \ A-rep H{
|
||||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
||||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
||||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
||||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||||
|
@ -227,7 +235,7 @@ A{ DEFINES ${A}{
|
||||||
|
|
||||||
A-deref DEFINES-PRIVATE ${A}-deref
|
A-deref DEFINES-PRIVATE ${A}-deref
|
||||||
|
|
||||||
A-rep IS ${A/2}-rep
|
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
|
||||||
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
|
||||||
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
|
||||||
|
|
||||||
|
@ -267,6 +275,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
|
||||||
|
|
||||||
M: A byte-length drop 32 ; inline
|
M: A byte-length drop 32 ; inline
|
||||||
|
|
||||||
|
M: A element-type drop A-rep rep-component-type ;
|
||||||
|
|
||||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||||
|
|
||||||
M: A pprint-delims drop \ A{ \ } ;
|
M: A pprint-delims drop \ A{ \ } ;
|
||||||
|
@ -298,9 +308,12 @@ INSTANCE: A sequence
|
||||||
|
|
||||||
\ A \ A-with \ A-rep H{
|
\ A \ A-with \ A-rep H{
|
||||||
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
|
||||||
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
|
||||||
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
|
||||||
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
|
||||||
|
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
|
||||||
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
|
||||||
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
|
||||||
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
|
||||||
|
|
|
@ -8,8 +8,11 @@ IN: math.vectors.simd.intrinsics
|
||||||
ERROR: bad-simd-call ;
|
ERROR: bad-simd-call ;
|
||||||
|
|
||||||
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
|
||||||
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
|
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
|
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
|
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
|
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
|
||||||
|
@ -68,9 +71,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||||
M: vector-rep supported-simd-op?
|
M: vector-rep supported-simd-op?
|
||||||
{
|
{
|
||||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||||
|
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
|
||||||
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
|
||||||
{ \ (simd-v-) [ %sub-vector-reps ] }
|
{ \ (simd-v-) [ %sub-vector-reps ] }
|
||||||
|
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
|
||||||
{ \ (simd-v*) [ %mul-vector-reps ] }
|
{ \ (simd-v*) [ %mul-vector-reps ] }
|
||||||
|
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
|
||||||
{ \ (simd-v/) [ %div-vector-reps ] }
|
{ \ (simd-v/) [ %div-vector-reps ] }
|
||||||
{ \ (simd-vmin) [ %min-vector-reps ] }
|
{ \ (simd-vmin) [ %min-vector-reps ] }
|
||||||
{ \ (simd-vmax) [ %max-vector-reps ] }
|
{ \ (simd-vmax) [ %max-vector-reps ] }
|
||||||
|
|
|
@ -161,8 +161,12 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
|
||||||
$nl
|
$nl
|
||||||
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
|
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
|
||||||
{ $subsection (simd-v+) }
|
{ $subsection (simd-v+) }
|
||||||
{ $subsection (simd-v-) }
|
{ $subsection (simd-vs+) }
|
||||||
{ $subsection (simd-v+-) }
|
{ $subsection (simd-v+-) }
|
||||||
|
{ $subsection (simd-v-) }
|
||||||
|
{ $subsection (simd-vs-) }
|
||||||
|
{ $subsection (simd-v*) }
|
||||||
|
{ $subsection (simd-vs*) }
|
||||||
{ $subsection (simd-v/) }
|
{ $subsection (simd-v/) }
|
||||||
{ $subsection (simd-vmin) }
|
{ $subsection (simd-vmin) }
|
||||||
{ $subsection (simd-vmax) }
|
{ $subsection (simd-vmax) }
|
||||||
|
|
|
@ -53,11 +53,14 @@ H{
|
||||||
{ norm-sq { +vector+ -> +nonnegative+ } }
|
{ norm-sq { +vector+ -> +nonnegative+ } }
|
||||||
{ normalize { +vector+ -> +vector+ } }
|
{ normalize { +vector+ -> +vector+ } }
|
||||||
{ v* { +vector+ +vector+ -> +vector+ } }
|
{ v* { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vs* { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
{ v*n { +vector+ +scalar+ -> +vector+ } }
|
||||||
{ v+ { +vector+ +vector+ -> +vector+ } }
|
{ v+ { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vs+ { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v+- { +vector+ +vector+ -> +vector+ } }
|
{ v+- { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
{ v+n { +vector+ +scalar+ -> +vector+ } }
|
||||||
{ v- { +vector+ +vector+ -> +vector+ } }
|
{ v- { +vector+ +vector+ -> +vector+ } }
|
||||||
|
{ vs- { +vector+ +vector+ -> +vector+ } }
|
||||||
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
{ v-n { +vector+ +scalar+ -> +vector+ } }
|
||||||
{ v. { +vector+ +vector+ -> +scalar+ } }
|
{ v. { +vector+ +vector+ -> +scalar+ } }
|
||||||
{ v/ { +vector+ +vector+ -> +vector+ } }
|
{ v/ { +vector+ +vector+ -> +vector+ } }
|
||||||
|
|
|
@ -26,7 +26,11 @@ $nl
|
||||||
{ $subsection v. }
|
{ $subsection v. }
|
||||||
{ $subsection norm }
|
{ $subsection norm }
|
||||||
{ $subsection norm-sq }
|
{ $subsection norm-sq }
|
||||||
{ $subsection normalize } ;
|
{ $subsection normalize }
|
||||||
|
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
|
||||||
|
{ $subsection vs+ }
|
||||||
|
{ $subsection vs- }
|
||||||
|
{ $subsection vs* } ;
|
||||||
|
|
||||||
ABOUT: "math-vectors"
|
ABOUT: "math-vectors"
|
||||||
|
|
||||||
|
@ -100,6 +104,34 @@ HELP: v.
|
||||||
{ $snippet "0 [ conjugate * + ] 2reduce" }
|
{ $snippet "0 [ conjugate * + ] 2reduce" }
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
HELP: vs+
|
||||||
|
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||||
|
{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
|
||||||
|
{ $examples
|
||||||
|
"With saturation:"
|
||||||
|
{ $example
|
||||||
|
"USING: math.vectors prettyprint specialized-arrays ;"
|
||||||
|
"SPECIALIZED-ARRAY: uchar"
|
||||||
|
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
|
||||||
|
"uchar-array{ 170 255 220 }"
|
||||||
|
}
|
||||||
|
"Without saturation:"
|
||||||
|
{ $example
|
||||||
|
"USING: math.vectors prettyprint specialized-arrays ;"
|
||||||
|
"SPECIALIZED-ARRAY: uchar"
|
||||||
|
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
|
||||||
|
"uchar-array{ 170 14 220 }"
|
||||||
|
}
|
||||||
|
} ;
|
||||||
|
|
||||||
|
HELP: vs-
|
||||||
|
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||||
|
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
|
||||||
|
|
||||||
|
HELP: vs*
|
||||||
|
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
|
||||||
|
{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
|
||||||
|
|
||||||
HELP: norm-sq
|
HELP: norm-sq
|
||||||
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
|
||||||
{ $description "Computes the squared length of a mathematical vector." } ;
|
{ $description "Computes the squared length of a mathematical vector." } ;
|
||||||
|
@ -120,3 +152,5 @@ HELP: set-axis
|
||||||
{ 2map v+ v- v* v/ } related-words
|
{ 2map v+ v- v* v/ } related-words
|
||||||
|
|
||||||
{ 2reduce v. } related-words
|
{ 2reduce v. } related-words
|
||||||
|
|
||||||
|
{ vs+ vs- vs* } related-words
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
! Copyright (C) 2005, 2008 Slava Pestov.
|
! Copyright (C) 2005, 2008 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: arrays kernel sequences math math.functions hints
|
USING: arrays alien.c-types kernel sequences math math.functions
|
||||||
math.order ;
|
hints math.order fry ;
|
||||||
IN: math.vectors
|
IN: math.vectors
|
||||||
|
|
||||||
|
GENERIC: element-type ( obj -- c-type )
|
||||||
|
|
||||||
: vneg ( u -- v ) [ neg ] map ;
|
: vneg ( u -- v ) [ neg ] map ;
|
||||||
|
|
||||||
: v+n ( u n -- v ) [ + ] curry map ;
|
: v+n ( u n -- v ) [ + ] curry map ;
|
||||||
|
@ -29,6 +31,13 @@ IN: math.vectors
|
||||||
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
|
||||||
nip ;
|
nip ;
|
||||||
|
|
||||||
|
: 2saturate-map ( u v quot -- w )
|
||||||
|
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
|
||||||
|
|
||||||
|
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
|
||||||
|
: vs- ( u v -- w ) [ - ] 2saturate-map ;
|
||||||
|
: vs* ( u v -- w ) [ * ] 2saturate-map ;
|
||||||
|
|
||||||
: vfloor ( v -- _v_ ) [ floor ] map ;
|
: vfloor ( v -- _v_ ) [ floor ] map ;
|
||||||
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
|
||||||
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
: vtruncate ( v -- -v- ) [ truncate ] map ;
|
||||||
|
|
|
@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors
|
||||||
kernel arrays combinators compiler compiler.units classes.struct
|
kernel arrays combinators compiler compiler.units classes.struct
|
||||||
combinators.smart compiler.tree.debugger math libc destructors
|
combinators.smart compiler.tree.debugger math libc destructors
|
||||||
sequences.private multiline eval words vocabs namespaces
|
sequences.private multiline eval words vocabs namespaces
|
||||||
assocs prettyprint alien.data ;
|
assocs prettyprint alien.data math.vectors ;
|
||||||
FROM: alien.c-types => float ;
|
FROM: alien.c-types => float ;
|
||||||
|
|
||||||
SPECIALIZED-ARRAY: int
|
SPECIALIZED-ARRAY: int
|
||||||
|
@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort
|
||||||
SPECIALIZED-ARRAY: char
|
SPECIALIZED-ARRAY: char
|
||||||
SPECIALIZED-ARRAY: uint
|
SPECIALIZED-ARRAY: uint
|
||||||
SPECIALIZED-ARRAY: float
|
SPECIALIZED-ARRAY: float
|
||||||
|
SPECIALIZED-ARRAY: ulonglong
|
||||||
|
|
||||||
|
[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
|
||||||
|
|
||||||
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test
|
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
! Copyright (C) 2008, 2009 Slava Pestov.
|
! Copyright (C) 2008, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors alien alien.c-types alien.data alien.parser assocs
|
USING: accessors alien alien.c-types alien.data alien.parser
|
||||||
byte-arrays classes compiler.units functors kernel lexer libc math
|
assocs byte-arrays classes compiler.units functors kernel lexer
|
||||||
math.vectors.specialization namespaces parser prettyprint.custom
|
libc math math.vectors math.vectors.specialization namespaces
|
||||||
sequences sequences.private strings summary vocabs vocabs.loader
|
parser prettyprint.custom sequences sequences.private strings
|
||||||
vocabs.parser vocabs.generated words fry combinators ;
|
summary vocabs vocabs.loader vocabs.parser vocabs.generated
|
||||||
|
words fry combinators present ;
|
||||||
IN: specialized-arrays
|
IN: specialized-arrays
|
||||||
|
|
||||||
MIXIN: specialized-array
|
MIXIN: specialized-array
|
||||||
|
@ -53,14 +54,14 @@ TUPLE: A
|
||||||
|
|
||||||
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
|
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
|
||||||
|
|
||||||
: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline
|
: <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
|
||||||
|
|
||||||
: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline
|
: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
|
||||||
|
|
||||||
: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline
|
: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
|
||||||
|
|
||||||
: byte-array>A ( byte-array -- specialized-array )
|
: byte-array>A ( byte-array -- specialized-array )
|
||||||
dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless
|
dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
|
||||||
<direct-A> ; inline
|
<direct-A> ; inline
|
||||||
|
|
||||||
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
|
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
|
||||||
|
@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ;
|
||||||
|
|
||||||
M: A resize
|
M: A resize
|
||||||
[
|
[
|
||||||
[ T heap-size * ] [ underlying>> ] bi*
|
[ \ T heap-size * ] [ underlying>> ] bi*
|
||||||
resize-byte-array
|
resize-byte-array
|
||||||
] [ drop ] 2bi
|
] [ drop ] 2bi
|
||||||
<direct-A> ; inline
|
<direct-A> ; inline
|
||||||
|
|
||||||
M: A byte-length length T heap-size * ; inline
|
M: A byte-length length \ T heap-size * ; inline
|
||||||
|
|
||||||
|
M: A element-type drop \ T ; inline
|
||||||
|
|
||||||
M: A direct-array-syntax drop \ A@ ;
|
M: A direct-array-syntax drop \ A@ ;
|
||||||
|
|
||||||
|
@ -116,15 +119,15 @@ M: word (underlying-type) "c-type" word-prop ;
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
: underlying-type-name ( c-type -- name )
|
: underlying-type-name ( c-type -- name )
|
||||||
underlying-type dup word? [ name>> ] when ;
|
underlying-type present ;
|
||||||
|
|
||||||
: specialized-array-vocab ( c-type -- vocab )
|
: specialized-array-vocab ( c-type -- vocab )
|
||||||
"specialized-arrays.instances." prepend ;
|
present "specialized-arrays.instances." prepend ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: define-array-vocab ( type -- vocab )
|
: define-array-vocab ( type -- vocab )
|
||||||
underlying-type-name
|
underlying-type
|
||||||
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
|
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
|
||||||
generate-vocab ;
|
generate-vocab ;
|
||||||
|
|
||||||
|
|
|
@ -77,12 +77,12 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
|
||||||
DEF(bool,sse_version,(void)):
|
DEF(bool,sse_version,(void)):
|
||||||
mov $0x1,RETURN_REG
|
mov $0x1,RETURN_REG
|
||||||
cpuid
|
cpuid
|
||||||
/* test $0x100000,%ecx
|
test $0x100000,%ecx
|
||||||
jnz sse_42
|
jnz sse_42
|
||||||
test $0x80000,%ecx
|
test $0x80000,%ecx
|
||||||
jnz sse_41
|
jnz sse_41
|
||||||
test $0x200,%ecx
|
test $0x200,%ecx
|
||||||
jnz ssse_3 */
|
jnz ssse_3
|
||||||
test $0x1,%ecx
|
test $0x1,%ecx
|
||||||
jnz sse_3
|
jnz sse_3
|
||||||
test $0x4000000,%edx
|
test $0x4000000,%edx
|
||||||
|
|
Loading…
Reference in New Issue