math.vectors.simd: add saturated arithmetic operations

db4
Slava Pestov 2009-09-20 23:16:02 -05:00
parent acea55c692
commit ea44ea3522
18 changed files with 220 additions and 89 deletions

View File

@ -1,5 +1,6 @@
USING: alien alien.syntax alien.c-types kernel tools.test
sequences system libc alien.strings io.encodings.utf8 ;
sequences system libc alien.strings io.encodings.utf8
math.constants ;
IN: alien.c-types.tests
CONSTANT: xyz 123
@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE
os windows? cpu x86.64? and [
[ -2147467259 ] [ 2147500037 <long> *long ] unit-test
] when
[ 0 ] [ -10 uchar c-type-clamp ] unit-test
[ 12 ] [ 12 uchar c-type-clamp ] unit-test
[ -10 ] [ -10 char c-type-clamp ] unit-test
[ 127 ] [ 230 char c-type-clamp ] unit-test
[ t ] [ pi dup float c-type-clamp = ] unit-test

View File

@ -1,11 +1,12 @@
! Copyright (C) 2004, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: byte-arrays arrays assocs kernel kernel.private math
namespaces make parser sequences strings words splitting math.parser
cpu.architecture alien alien.accessors alien.strings quotations
layouts system compiler.units io io.files io.encodings.binary
io.streams.memory accessors combinators effects continuations fry
classes vocabs vocabs.loader words.symbol ;
math.order math.parser namespaces make parser sequences strings
words splitting cpu.architecture alien alien.accessors
alien.strings quotations layouts system compiler.units io
io.files io.encodings.binary io.streams.memory accessors
combinators effects continuations fry classes vocabs
vocabs.loader words.symbol ;
QUALIFIED: math
IN: alien.c-types
@ -480,3 +481,17 @@ M: int-4-rep rep-component-type drop int ;
M: uint-4-rep rep-component-type drop uint ;
M: float-4-rep rep-component-type drop float ;
M: double-2-rep rep-component-type drop double ;
: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable
: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable
: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable
: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable
: c-type-interval ( c-type -- from to )
{
{ [ dup { float double } memq? ] [ drop -1/0. 1/0. ] }
{ [ dup { char short int long longlong } memq? ] [ signed-interval ] }
{ [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] }
} cond ; foldable
: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline

View File

@ -305,7 +305,7 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##sub-vector
PURE-INSN: ##saturated-add-vector
def: dst
use: src1 src2
literal: rep ;
@ -315,11 +315,26 @@ def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-sub-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##mul-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##saturated-mul-vector
def: dst
use: src1 src2
literal: rep ;
PURE-INSN: ##div-vector
def: dst
use: src1 src2

View File

@ -151,13 +151,16 @@ IN: compiler.cfg.intrinsics
{ math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] }
} enable-intrinsics ;
: enable-sse2-simd ( -- )
: enable-simd ( -- )
{
{ math.vectors.simd.intrinsics:assert-positive [ drop ] }
{ math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] }
@ -165,14 +168,10 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
{ math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] }
} enable-intrinsics ;
: enable-sse3-simd ( -- )
{
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
} enable-intrinsics ;
: emit-intrinsic ( node word -- )
"intrinsic" word-prop call( node -- ) ;

View File

@ -47,9 +47,12 @@ UNION: two-operand-insn
##min-float
##max-float
##add-vector
##sub-vector
##saturated-add-vector
##add-sub-vector
##sub-vector
##saturated-sub-vector
##mul-vector
##saturated-mul-vector
##div-vector
##min-vector
##max-vector ;

View File

@ -169,9 +169,12 @@ CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##box-vector %box-vector
CODEGEN: ##add-vector %add-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-add-vector %saturated-add-vector
CODEGEN: ##add-sub-vector %add-sub-vector
CODEGEN: ##sub-vector %sub-vector
CODEGEN: ##saturated-sub-vector %saturated-sub-vector
CODEGEN: ##mul-vector %mul-vector
CODEGEN: ##saturated-mul-vector %saturated-mul-vector
CODEGEN: ##div-vector %div-vector
CODEGEN: ##min-vector %min-vector
CODEGEN: ##max-vector %max-vector

View File

@ -18,6 +18,7 @@ compiler.tree.propagation.constraints
compiler.tree.propagation.call-effect
compiler.tree.propagation.transforms
compiler.tree.propagation.simd ;
FROM: alien.c-types => (signed-interval) (unsigned-interval) ;
IN: compiler.tree.propagation.known-words
{ + - * / }
@ -260,15 +261,9 @@ generic-comparison-ops [
alien-unsigned-8
} [
dup name>> {
{
[ "alien-signed-" ?head ]
[ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ]
}
{
[ "alien-unsigned-" ?head ]
[ string>number 8 * 2^ 1 - 0 swap [a,b] ]
}
} cond
{ [ "alien-signed-" ?head ] [ string>number (signed-interval) ] }
{ [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] }
} cond [a,b]
[ fits-in-fixnum? fixnum integer ? ] keep <class/interval-info>
'[ 2drop _ ] "outputs" set-word-prop
] each

View File

@ -181,9 +181,12 @@ HOOK: %broadcast-vector cpu ( dst src rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- )
HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- )
HOOK: %mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- )
HOOK: %div-vector cpu ( dst src1 src2 rep -- )
HOOK: %min-vector cpu ( dst src1 src2 rep -- )
HOOK: %max-vector cpu ( dst src1 src2 rep -- )
@ -194,9 +197,12 @@ HOOK: %broadcast-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-add-vector-reps cpu ( -- reps )
HOOK: %add-sub-vector-reps cpu ( -- reps )
HOOK: %sub-vector-reps cpu ( -- reps )
HOOK: %saturated-sub-vector-reps cpu ( -- reps )
HOOK: %mul-vector-reps cpu ( -- reps )
HOOK: %saturated-mul-vector-reps cpu ( -- reps )
HOOK: %div-vector-reps cpu ( -- reps )
HOOK: %min-vector-reps cpu ( -- reps )
HOOK: %max-vector-reps cpu ( -- reps )

View File

@ -323,6 +323,30 @@ M: x86 %add-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
{
{ char-16-rep [ PADDSB ] }
{ uchar-16-rep [ PADDUSB ] }
{ short-8-rep [ PADDSW ] }
{ ushort-8-rep [ PADDUSW ] }
} case drop ;
M: x86 %saturated-add-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
} case drop ;
M: x86 %add-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ SUBPS ] }
@ -341,15 +365,17 @@ M: x86 %sub-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
{ char-16-rep [ PSUBSB ] }
{ uchar-16-rep [ PSUBUSB ] }
{ short-8-rep [ PSUBSW ] }
{ ushort-8-rep [ PSUBUSW ] }
} case drop ;
M: x86 %add-sub-vector-reps
M: x86 %saturated-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
@ -368,6 +394,10 @@ M: x86 %mul-vector-reps
{ sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- )
{
{ float-4-rep [ DIVPS ] }
@ -854,46 +884,29 @@ M: x86 small-enough? ( n -- ? )
#! set up by the caller.
stack-frame get total-size>> + stack@ ;
: enable-sse2 ( -- )
enable-float-intrinsics
enable-fsqrt
enable-float-min/max
enable-sse2-simd ;
: enable-sse3 ( -- )
enable-sse2
enable-sse3-simd ;
enable-min/max
:: install-sse-check ( version -- )
:: install-sse2-check ( -- )
[
sse-version version < [
"This image was built to use " write
version sse-string write
" but your CPU only supports " write
sse-version sse-string write "." print
sse-version 20 < [
"This image was built to use SSE2 but your CPU does not support it." print
"You will need to bootstrap Factor again." print
flush
1 exit
] when
] "cpu.x86" add-init-hook ;
: enable-sse ( version -- )
{
{ 00 [ ] }
{ 10 [ ] }
{ 20 [ enable-sse2 ] }
{ 30 [ enable-sse3 ] }
{ 33 [ enable-sse3 ] }
{ 41 [ enable-sse3 ] }
{ 42 [ enable-sse3 ] }
} case ;
: enable-sse2 ( version -- )
20 >= [
enable-float-intrinsics
enable-fsqrt
enable-float-min/max
install-sse2-check
] when ;
enable-simd
enable-min/max
: check-sse ( -- )
[ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version 30 min
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri ;
"Checking for multimedia extensions: " write sse-version
[ sse-string write " detected" print ] [ enable-sse2 ] bi ;

View File

@ -41,9 +41,12 @@ MACRO: simd-boa ( rep class -- simd-array )
[
{
{ v+ (simd-v+) }
{ v- (simd-v-) }
{ vs+ (simd-vs+) }
{ v+- (simd-v+-) }
{ v- (simd-v-) }
{ vs- (simd-vs-) }
{ v* (simd-v*) }
{ vs* (simd-vs*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
{ vmax (simd-vmax) }
@ -111,7 +114,7 @@ A{ DEFINES ${A}{
NTH [ T dup c-type-getter-boxer array-accessor ]
SET-NTH [ T dup c-setter array-accessor ]
A-rep IS ${A}-rep
A-rep [ A name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -142,6 +145,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length underlying>> length ; inline
M: A element-type drop A-rep rep-component-type ;
M: A pprint-delims drop \ A{ \ } ;
M: A >pprint-sequence ;
@ -172,9 +177,12 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }
@ -227,7 +235,7 @@ A{ DEFINES ${A}{
A-deref DEFINES-PRIVATE ${A}-deref
A-rep IS ${A/2}-rep
A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ]
A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op
A-v->n-op DEFINES-PRIVATE ${A}-v->n-op
@ -267,6 +275,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ;
M: A byte-length drop 32 ; inline
M: A element-type drop A-rep rep-component-type ;
SYNTAX: A{ \ } [ >A ] parse-literal ;
M: A pprint-delims drop \ A{ \ } ;
@ -298,9 +308,12 @@ INSTANCE: A sequence
\ A \ A-with \ A-rep H{
{ v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] }
{ vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] }
{ v- [ [ (simd-v-) ] \ A-vv->v-op execute ] }
{ vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] }
{ v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] }
{ v* [ [ (simd-v*) ] \ A-vv->v-op execute ] }
{ vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] }
{ v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] }
{ vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] }
{ vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] }

View File

@ -8,8 +8,11 @@ IN: math.vectors.simd.intrinsics
ERROR: bad-simd-call ;
: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ;
: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ;
@ -68,9 +71,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
{ \ (simd-vs+) [ %saturated-add-vector-reps ] }
{ \ (simd-v+-) [ %add-sub-vector-reps ] }
{ \ (simd-v-) [ %sub-vector-reps ] }
{ \ (simd-vs-) [ %saturated-sub-vector-reps ] }
{ \ (simd-v*) [ %mul-vector-reps ] }
{ \ (simd-vs*) [ %saturated-mul-vector-reps ] }
{ \ (simd-v/) [ %div-vector-reps ] }
{ \ (simd-vmin) [ %min-vector-reps ] }
{ \ (simd-vmax) [ %max-vector-reps ] }

View File

@ -161,8 +161,12 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
$nl
"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "."
{ $subsection (simd-v+) }
{ $subsection (simd-v-) }
{ $subsection (simd-vs+) }
{ $subsection (simd-v+-) }
{ $subsection (simd-v-) }
{ $subsection (simd-vs-) }
{ $subsection (simd-v*) }
{ $subsection (simd-vs*) }
{ $subsection (simd-v/) }
{ $subsection (simd-vmin) }
{ $subsection (simd-vmax) }

View File

@ -53,11 +53,14 @@ H{
{ norm-sq { +vector+ -> +nonnegative+ } }
{ normalize { +vector+ -> +vector+ } }
{ v* { +vector+ +vector+ -> +vector+ } }
{ vs* { +vector+ +vector+ -> +vector+ } }
{ v*n { +vector+ +scalar+ -> +vector+ } }
{ v+ { +vector+ +vector+ -> +vector+ } }
{ vs+ { +vector+ +vector+ -> +vector+ } }
{ v+- { +vector+ +vector+ -> +vector+ } }
{ v+n { +vector+ +scalar+ -> +vector+ } }
{ v- { +vector+ +vector+ -> +vector+ } }
{ vs- { +vector+ +vector+ -> +vector+ } }
{ v-n { +vector+ +scalar+ -> +vector+ } }
{ v. { +vector+ +vector+ -> +scalar+ } }
{ v/ { +vector+ +vector+ -> +vector+ } }

View File

@ -26,7 +26,11 @@ $nl
{ $subsection v. }
{ $subsection norm }
{ $subsection norm-sq }
{ $subsection normalize } ;
{ $subsection normalize }
"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:"
{ $subsection vs+ }
{ $subsection vs- }
{ $subsection vs* } ;
ABOUT: "math-vectors"
@ -100,6 +104,34 @@ HELP: v.
{ $snippet "0 [ conjugate * + ] 2reduce" }
} ;
HELP: vs+
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." }
{ $examples
"With saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ."
"uchar-array{ 170 255 220 }"
}
"Without saturation:"
{ $example
"USING: math.vectors prettyprint specialized-arrays ;"
"SPECIALIZED-ARRAY: uchar"
"uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ."
"uchar-array{ 170 14 220 }"
}
} ;
HELP: vs-
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ;
HELP: vs*
{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } }
{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ;
HELP: norm-sq
{ $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } }
{ $description "Computes the squared length of a mathematical vector." } ;
@ -120,3 +152,5 @@ HELP: set-axis
{ 2map v+ v- v* v/ } related-words
{ 2reduce v. } related-words
{ vs+ vs- vs* } related-words

View File

@ -1,9 +1,11 @@
! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays kernel sequences math math.functions hints
math.order ;
USING: arrays alien.c-types kernel sequences math math.functions
hints math.order fry ;
IN: math.vectors
GENERIC: element-type ( obj -- c-type )
: vneg ( u -- v ) [ neg ] map ;
: v+n ( u n -- v ) [ + ] curry map ;
@ -29,6 +31,13 @@ IN: math.vectors
[ [ not ] 2dip pick [ + ] [ - ] if ] 2map
nip ;
: 2saturate-map ( u v quot -- w )
pick element-type '[ @ _ c-type-clamp ] 2map ; inline
: vs+ ( u v -- w ) [ + ] 2saturate-map ;
: vs- ( u v -- w ) [ - ] 2saturate-map ;
: vs* ( u v -- w ) [ * ] 2saturate-map ;
: vfloor ( v -- _v_ ) [ floor ] map ;
: vceiling ( v -- ^v^ ) [ ceiling ] map ;
: vtruncate ( v -- -v- ) [ truncate ] map ;

View File

@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors
kernel arrays combinators compiler compiler.units classes.struct
combinators.smart compiler.tree.debugger math libc destructors
sequences.private multiline eval words vocabs namespaces
assocs prettyprint alien.data ;
assocs prettyprint alien.data math.vectors ;
FROM: alien.c-types => float ;
SPECIALIZED-ARRAY: int
@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort
SPECIALIZED-ARRAY: char
SPECIALIZED-ARRAY: uint
SPECIALIZED-ARRAY: float
SPECIALIZED-ARRAY: ulonglong
[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test
[ t ] [ { 1 2 3 } >int-array int-array? ] unit-test

View File

@ -1,10 +1,11 @@
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser assocs
byte-arrays classes compiler.units functors kernel lexer libc math
math.vectors.specialization namespaces parser prettyprint.custom
sequences sequences.private strings summary vocabs vocabs.loader
vocabs.parser vocabs.generated words fry combinators ;
USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer
libc math math.vectors math.vectors.specialization namespaces
parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators present ;
IN: specialized-arrays
MIXIN: specialized-array
@ -53,14 +54,14 @@ TUPLE: A
: <direct-A> ( alien len -- specialized-array ) A boa ; inline
: <A> ( n -- specialized-array ) [ T <underlying> ] keep <direct-A> ; inline
: <A> ( n -- specialized-array ) [ \ T <underlying> ] keep <direct-A> ; inline
: (A) ( n -- specialized-array ) [ T (underlying) ] keep <direct-A> ; inline
: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep <direct-A> ; inline
: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep <direct-A> ; inline
: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep <direct-A> ; inline
: byte-array>A ( byte-array -- specialized-array )
dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless
dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ;
M: A resize
[
[ T heap-size * ] [ underlying>> ] bi*
[ \ T heap-size * ] [ underlying>> ] bi*
resize-byte-array
] [ drop ] 2bi
<direct-A> ; inline
M: A byte-length length T heap-size * ; inline
M: A byte-length length \ T heap-size * ; inline
M: A element-type drop \ T ; inline
M: A direct-array-syntax drop \ A@ ;
@ -116,15 +119,15 @@ M: word (underlying-type) "c-type" word-prop ;
} cond ;
: underlying-type-name ( c-type -- name )
underlying-type dup word? [ name>> ] when ;
underlying-type present ;
: specialized-array-vocab ( c-type -- vocab )
"specialized-arrays.instances." prepend ;
present "specialized-arrays.instances." prepend ;
PRIVATE>
: define-array-vocab ( type -- vocab )
underlying-type-name
underlying-type
[ specialized-array-vocab ] [ '[ _ define-array ] ] bi
generate-vocab ;

View File

@ -77,12 +77,12 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
DEF(bool,sse_version,(void)):
mov $0x1,RETURN_REG
cpuid
/* test $0x100000,%ecx
test $0x100000,%ecx
jnz sse_42
test $0x80000,%ecx
jnz sse_41
test $0x200,%ecx
jnz ssse_3 */
jnz ssse_3
test $0x1,%ecx
jnz sse_3
test $0x4000000,%edx