math.vectors.simd: add saturated arithmetic operations

Slava Pestov 2009-09-20 23:16:02 -05:00
parent 78c949b9b7
commit 66871995c9
18 changed files with 220 additions and 89 deletions

View File

@ -1,5 +1,6 @@
USING: alien alien.syntax alien.c-types kernel tools.test USING: alien alien.syntax alien.c-types kernel tools.test
sequences system libc alien.strings io.encodings.utf8 ; sequences system libc alien.strings io.encodings.utf8
math.constants ;
IN: alien.c-types.tests IN: alien.c-types.tests
CONSTANT: xyz 123 CONSTANT: xyz 123
@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE
os windows? cpu x86.64? and [ os windows? cpu x86.64? and [
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test [ -2147467259 ] [ 2147500037 <long> *long ] unit-test
] when ] when
[ 0 ] [ -10 uchar c-type-clamp ] unit-test
[ 12 ] [ 12 uchar c-type-clamp ] unit-test
[ -10 ] [ -10 char c-type-clamp ] unit-test
[ 127 ] [ 230 char c-type-clamp ] unit-test
[ t ] [ pi dup float c-type-clamp = ] unit-test

View File

@ -1,11 +1,12 @@
! Copyright (C) 2004, 2009 Slava Pestov. ! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays arrays assocs kernel kernel.private math USING: byte-arrays arrays assocs kernel kernel.private math
namespaces make parser sequences strings words splitting math.parser math.order math.parser namespaces make parser sequences strings
cpu.architecture alien alien.accessors alien.strings quotations words splitting cpu.architecture alien alien.accessors
layouts system compiler.units io io.files io.encodings.binary alien.strings quotations layouts system compiler.units io
io.streams.memory accessors combinators effects continuations fry io.files io.encodings.binary io.streams.memory accessors
classes vocabs vocabs.loader words.symbol ; combinators effects continuations fry classes vocabs
vocabs.loader words.symbol ;
QUALIFIED: math QUALIFIED: math
IN: alien.c-types IN: alien.c-types
@ -480,3 +481,17 @@ M: int-4-rep rep-component-type drop int ;
M: uint-4-rep rep-component-type drop uint ; M: uint-4-rep rep-component-type drop uint ;
M: float-4-rep rep-component-type drop float ; M: float-4-rep rep-component-type drop float ;
M: double-2-rep rep-component-type drop double ; M: double-2-rep rep-component-type drop double ;
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
: c-type-interval ( c-type -- from to )
{
{ [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
{ [ dup { char short int long longlong } memq? ] [ signed-interval ] }
{ [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
} cond ; foldable
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline

View File

@ -305,7 +305,7 @@ def: dst
use: src1 src2 use: src1 src2
literal: rep ; literal: rep ;
PURE-INSN: ##sub-vector PURE-INSN: ##saturated-add-vector
def: dst def: dst
use: src1 src2 use: src1 src2
literal: rep ; literal: rep ;
@ -315,11 +315,26 @@ def: dst
use: src1 src2 use: src1 src2
literal: rep ; literal: rep ;
PURE-INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-vector PURE-INSN: ##mul-vector
def: dst def: dst
use: src1 src2 use: src1 src2
literal: rep ; literal: rep ;
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##div-vector PURE-INSN: ##div-vector
def: dst def: dst
use: src1 src2 use: src1 src2

View File

@ -151,13 +151,16 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ; } enable-intrinsics ;
: enable-sse2-simd ( -- ) : enable-simd ( -- )
{ {
{ math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
@ -165,14 +168,10 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ; } enable-intrinsics ;
: enable-sse3-simd ( -- )
{
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- ) : emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ; "intrinsic" word-prop call( node -- ) ;

View File

@ -47,9 +47,12 @@ UNION: two-operand-insn
##min-float ##min-float
##max-float ##max-float
##add-vector ##add-vector
##sub-vector ##saturated-add-vector
##add-sub-vector ##add-sub-vector
##sub-vector
##saturated-sub-vector
##mul-vector ##mul-vector
##saturated-mul-vector
##div-vector ##div-vector
##min-vector ##min-vector
##max-vector ; ##max-vector ;

View File

@ -169,9 +169,12 @@ CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector CODEGEN: ##saturated-add-vector %saturated-add-vector
CODEGEN: ##add-sub-vector %add-sub-vector CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector CODEGEN: ##max-vector %max-vector

View File

@ -18,6 +18,7 @@ compiler.tree.propagation.constraints
compiler.tree.propagation.call-effect compiler.tree.propagation.call-effect
compiler.tree.propagation.transforms compiler.tree.propagation.transforms
compiler.tree.propagation.simd ; compiler.tree.propagation.simd ;
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
IN: compiler.tree.propagation.known-words IN: compiler.tree.propagation.known-words
{ + - * / } { + - * / }
@ -260,15 +261,9 @@ generic-comparison-ops [
alien-unsigned-8 alien-unsigned-8
} [ } [
dup name>> { dup name>> {
{ { [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
[ "alien-signed-" ?head ] { [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
[ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ] } cond [a,b]
}
{
[ "alien-unsigned-" ?head ]
[ string>number 8 * 2^ 1 - 0 swap [a,b] ]
}
} cond
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info> [ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
'[ 2drop _ ] "outputs" set-word-prop '[ 2drop _ ] "outputs" set-word-prop
] each ] each

View File

@ -181,9 +181,12 @@ HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- )
@ -194,9 +197,12 @@ HOOK: %broadcast-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps )
HOOK: %add-sub-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps ) HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps )

View File

@ -323,6 +323,30 @@ M: x86 %add-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PADDSB ] }
{ uchar-16-rep [ PADDUSB ] }
{ short-8-rep [ PADDSW ] }
{ ushort-8-rep [ PADDUSW ] }
} case drop ;
M: x86 %saturated-add-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
} case drop ;
M: x86 %add-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sub-vector ( dst src1 src2 rep -- ) M: x86 %sub-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ SUBPS ] } { float-4-rep [ SUBPS ] }
@ -341,15 +365,17 @@ M: x86 %sub-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- ) M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ ADDSUBPS ] } { char-16-rep [ PSUBSB ] }
{ double-2-rep [ ADDSUBPD ] } { uchar-16-rep [ PSUBUSB ] }
{ short-8-rep [ PSUBSW ] }
{ ushort-8-rep [ PSUBUSW ] }
} case drop ; } case drop ;
M: x86 %add-sub-vector-reps M: x86 %saturated-sub-vector-reps
{ {
{ sse3? { float-4-rep double-2-rep } } { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ; } available-reps ;
M: x86 %mul-vector ( dst src1 src2 rep -- ) M: x86 %mul-vector ( dst src1 src2 rep -- )
@ -368,6 +394,10 @@ M: x86 %mul-vector-reps
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- ) M: x86 %div-vector ( dst src1 src2 rep -- )
{ {
{ float-4-rep [ DIVPS ] } { float-4-rep [ DIVPS ] }
@ -854,46 +884,29 @@ M: x86 small-enough? ( n -- ? )
#! set up by the caller. #! set up by the caller.
stack-frame get total-size>> + stack@ ; stack-frame get total-size>> + stack@ ;
: enable-sse2 ( -- ) :: install-sse2-check ( -- )
enable-float-intrinsics
enable-fsqrt
enable-float-min/max
enable-sse2-simd ;
: enable-sse3 ( -- )
enable-sse2
enable-sse3-simd ;
enable-min/max
:: install-sse-check ( version -- )
[ [
sse-version version < [ sse-version 20 < [
"This image was built to use " write "This image was built to use SSE2 but your CPU does not support it." print
version sse-string write
" but your CPU only supports " write
sse-version sse-string write "." print
"You will need to bootstrap Factor again." print "You will need to bootstrap Factor again." print
flush flush
1 exit 1 exit
] when ] when
] "cpu.x86" add-init-hook ; ] "cpu.x86" add-init-hook ;
: enable-sse ( version -- ) : enable-sse2 ( version -- )
{ 20 >= [
{ 00 [ ] } enable-float-intrinsics
{ 10 [ ] } enable-fsqrt
{ 20 [ enable-sse2 ] } enable-float-min/max
{ 30 [ enable-sse3 ] } install-sse2-check
{ 33 [ enable-sse3 ] } ] when ;
{ 41 [ enable-sse3 ] }
{ 42 [ enable-sse3 ] }
} case ; enable-simd
enable-min/max
: check-sse ( -- ) : check-sse ( -- )
[ { sse_version } compile ] with-optimizer [ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version
"Checking for multimedia extensions: " write sse-version 30 min [ sse-string write " detected" print ] [ enable-sse2 ] bi ;
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri ;

View File

@ -41,9 +41,12 @@ MACRO: simd-boa ( rep class -- simd-array )
[ [
{ {
{ v+ (simd-v+) } { v+ (simd-v+) }
{ v- (simd-v-) } { vs+ (simd-vs+) }
{ v+- (simd-v+-) } { v+- (simd-v+-) }
{ v- (simd-v-) }
{ vs- (simd-vs-) }
{ v* (simd-v*) } { v* (simd-v*) }
{ vs* (simd-vs*) }
{ v/ (simd-v/) } { v/ (simd-v/) }
{ vmin (simd-vmin) } { vmin (simd-vmin) }
{ vmax (simd-vmax) } { vmax (simd-vmax) }
@ -111,7 +114,7 @@ A{ DEFINES ${A}{
NTH [ T dup c-type-getter-boxer array-accessor ] NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ] SET-NTH [ T dup c-setter array-accessor ]
A-rep IS ${A}-rep A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -142,6 +145,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length underlying>> length ; inline M: A byte-length underlying>> length ; inline
M: A element-type drop A-rep rep-component-type ;
M: A pprint-delims drop \ A{ \ } ; M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ; M: A >pprint-sequence ;
@ -172,9 +177,12 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{ \ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
@ -227,7 +235,7 @@ A{ DEFINES ${A}{
A-deref DEFINES-PRIVATE ${A}-deref A-deref DEFINES-PRIVATE ${A}-deref
A-rep IS ${A/2}-rep A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -267,6 +275,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length drop 32 ; inline M: A byte-length drop 32 ; inline
M: A element-type drop A-rep rep-component-type ;
SYNTAX: A{ \ } [ >A ] parse-literal ; SYNTAX: A{ \ } [ >A ] parse-literal ;
M: A pprint-delims drop \ A{ \ } ; M: A pprint-delims drop \ A{ \ } ;
@ -298,9 +308,12 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{ \ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }

View File

@ -8,8 +8,11 @@ IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ; ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
@ -68,9 +71,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
M: vector-rep supported-simd-op? M: vector-rep supported-simd-op?
{ {
{ \ (simd-v+) [ %add-vector-reps ] } { \ (simd-v+) [ %add-vector-reps ] }
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
{ \ (simd-v+-) [ %add-sub-vector-reps ] } { \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] } { \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] } { \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] } { \ (simd-v/) [ %div-vector-reps ] }
{ \ (simd-vmin) [ %min-vector-reps ] } { \ (simd-vmin) [ %min-vector-reps ] }
{ \ (simd-vmax) [ %max-vector-reps ] } { \ (simd-vmax) [ %max-vector-reps ] }

View File

@ -161,8 +161,12 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
$nl $nl
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." "It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) } { $subsection (simd-v+) }
{ $subsection (simd-v-) } { $subsection (simd-vs+) }
{ $subsection (simd-v+-) } { $subsection (simd-v+-) }
{ $subsection (simd-v-) }
{ $subsection (simd-vs-) }
{ $subsection (simd-v*) }
{ $subsection (simd-vs*) }
{ $subsection (simd-v/) } { $subsection (simd-v/) }
{ $subsection (simd-vmin) } { $subsection (simd-vmin) }
{ $subsection (simd-vmax) } { $subsection (simd-vmax) }

View File

@ -53,11 +53,14 @@ H{
{ norm-sq { +vector+ -> +nonnegative+ } } { norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } } { normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } } { v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } } { v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } } { v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } } { v/ { +vector+ +vector+ -> +vector+ } }

View File

@ -26,7 +26,11 @@ $nl
{ $subsection v. } { $subsection v. }
{ $subsection norm } { $subsection norm }
{ $subsection norm-sq } { $subsection norm-sq }
{ $subsection normalize } ; { $subsection normalize }
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
{ $subsection vs+ }
{ $subsection vs- }
{ $subsection vs* } ;
ABOUT: "math-vectors" ABOUT: "math-vectors"
@ -100,6 +104,34 @@ HELP: v.
{ $snippet "0 [ conjugate * + ] 2reduce" } { $snippet "0 [ conjugate * + ] 2reduce" }
} ; } ;
HELP: vs+
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
{ $examples
"With saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
"uchar-array{ 170 255 220 }"
}
"Without saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
"uchar-array{ 170 14 220 }"
}
} ;
HELP: vs-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
HELP: vs*
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
HELP: norm-sq HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ; { $description "Computes the squared length of a mathematical vector." } ;
@ -120,3 +152,5 @@ HELP: set-axis
{ 2map v+ v- v* v/ } related-words { 2map v+ v- v* v/ } related-words
{ 2reduce v. } related-words { 2reduce v. } related-words
{ vs+ vs- vs* } related-words

View File

@ -1,9 +1,11 @@
! Copyright (C) 2005, 2008 Slava Pestov. ! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: arrays kernel sequences math math.functions hints USING: arrays alien.c-types kernel sequences math math.functions
math.order ; hints math.order fry ;
IN: math.vectors IN: math.vectors
GENERIC: element-type ( obj -- c-type )
: vneg ( u -- v ) [ neg ] map ; : vneg ( u -- v ) [ neg ] map ;
: v+n ( u n -- v ) [ + ] curry map ; : v+n ( u n -- v ) [ + ] curry map ;
@ -29,6 +31,13 @@ IN: math.vectors
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map [ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ; nip ;
: 2saturate-map ( u v quot -- w )
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
: vs- ( u v -- w ) [ - ] 2saturate-map ;
: vs* ( u v -- w ) [ * ] 2saturate-map ;
: vfloor ( v -- _v_ ) [ floor ] map ; : vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ;

View File

@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors
kernel arrays combinators compiler compiler.units classes.struct kernel arrays combinators compiler compiler.units classes.struct
combinators.smart compiler.tree.debugger math libc destructors combinators.smart compiler.tree.debugger math libc destructors
sequences.private multiline eval words vocabs namespaces sequences.private multiline eval words vocabs namespaces
assocs prettyprint alien.data ; assocs prettyprint alien.data math.vectors ;
FROM: alien.c-types => float ; FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: int SPECIALIZED-ARRAY: int
@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort
SPECIALIZED-ARRAY: char SPECIALIZED-ARRAY: char
SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: float SPECIALIZED-ARRAY: float
SPECIALIZED-ARRAY: ulonglong
[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test [ t ] [ { 1 2 3 } >int-array int-array? ] unit-test

View File

@ -1,10 +1,11 @@
! Copyright (C) 2008, 2009 Slava Pestov. ! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser assocs USING: accessors alien alien.c-types alien.data alien.parser
byte-arrays classes compiler.units functors kernel lexer libc math assocs byte-arrays classes compiler.units functors kernel lexer
math.vectors.specialization namespaces parser prettyprint.custom libc math math.vectors math.vectors.specialization namespaces
sequences sequences.private strings summary vocabs vocabs.loader parser prettyprint.custom sequences sequences.private strings
vocabs.parser vocabs.generated words fry combinators ; summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators present ;
IN: specialized-arrays IN: specialized-arrays
MIXIN: specialized-array MIXIN: specialized-array
@ -53,14 +54,14 @@ TUPLE: A
: <direct-A> ( alien len -- specialized-array ) A boa ; inline : <direct-A> ( alien len -- specialized-array ) A boa ; inline
: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline : <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline : (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline : malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
: byte-array>A ( byte-array -- specialized-array ) : byte-array>A ( byte-array -- specialized-array )
dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline <direct-A> ; inline
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ;
M: A resize M: A resize
[ [
[ T heap-size * ] [ underlying>> ] bi* [ \ T heap-size * ] [ underlying>> ] bi*
resize-byte-array resize-byte-array
] [ drop ] 2bi ] [ drop ] 2bi
<direct-A> ; inline <direct-A> ; inline
M: A byte-length length T heap-size * ; inline M: A byte-length length \ T heap-size * ; inline
M: A element-type drop \ T ; inline
M: A direct-array-syntax drop \ A@ ; M: A direct-array-syntax drop \ A@ ;
@ -116,15 +119,15 @@ M: word (underlying-type) "c-type" word-prop ;
} cond ; } cond ;
: underlying-type-name ( c-type -- name ) : underlying-type-name ( c-type -- name )
underlying-type dup word? [ name>> ] when ; underlying-type present ;
: specialized-array-vocab ( c-type -- vocab ) : specialized-array-vocab ( c-type -- vocab )
"specialized-arrays.instances." prepend ; present "specialized-arrays.instances." prepend ;
PRIVATE> PRIVATE>
: define-array-vocab ( type -- vocab ) : define-array-vocab ( type -- vocab )
underlying-type-name underlying-type
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi [ specialized-array-vocab ] [ '[ _ define-array ] ] bi
generate-vocab ; generate-vocab ;

View File

@ -77,12 +77,12 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
DEF(bool,sse_version,(void)): DEF(bool,sse_version,(void)):
mov $0x1,RETURN_REG mov $0x1,RETURN_REG
cpuid cpuid
/* test $0x100000,%ecx test $0x100000,%ecx
jnz sse_42 jnz sse_42
test $0x80000,%ecx test $0x80000,%ecx
jnz sse_41 jnz sse_41
test $0x200,%ecx test $0x200,%ecx
jnz ssse_3 */ jnz ssse_3
test $0x1,%ecx test $0x1,%ecx
jnz sse_3 jnz sse_3
test $0x4000000,%edx test $0x4000000,%edx