diff --git a/basis/alien/c-types/c-types-tests.factor b/basis/alien/c-types/c-types-tests.factor index 792e7d416a..a893ffebe8 100644 --- a/basis/alien/c-types/c-types-tests.factor +++ b/basis/alien/c-types/c-types-tests.factor @@ -1,5 +1,6 @@ USING: alien alien.syntax alien.c-types kernel tools.test -sequences system libc alien.strings io.encodings.utf8 ; +sequences system libc alien.strings io.encodings.utf8 +math.constants ; IN: alien.c-types.tests CONSTANT: xyz 123 @@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE os windows? cpu x86.64? and [ [ -2147467259 ] [ 2147500037 *long ] unit-test ] when + +[ 0 ] [ -10 uchar c-type-clamp ] unit-test +[ 12 ] [ 12 uchar c-type-clamp ] unit-test +[ -10 ] [ -10 char c-type-clamp ] unit-test +[ 127 ] [ 230 char c-type-clamp ] unit-test +[ t ] [ pi dup float c-type-clamp = ] unit-test diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index afbb664fed..1ad4f75a3c 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -1,11 +1,12 @@ ! Copyright (C) 2004, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: byte-arrays arrays assocs kernel kernel.private math -namespaces make parser sequences strings words splitting math.parser -cpu.architecture alien alien.accessors alien.strings quotations -layouts system compiler.units io io.files io.encodings.binary -io.streams.memory accessors combinators effects continuations fry -classes vocabs vocabs.loader words.symbol ; +math.order math.parser namespaces make parser sequences strings +words splitting cpu.architecture alien alien.accessors +alien.strings quotations layouts system compiler.units io +io.files io.encodings.binary io.streams.memory accessors +combinators effects continuations fry classes vocabs +vocabs.loader words.symbol ; QUALIFIED: math IN: alien.c-types @@ -480,3 +481,17 @@ M: int-4-rep rep-component-type drop int ; M: uint-4-rep rep-component-type drop uint ; M: float-4-rep rep-component-type drop float ; M: double-2-rep rep-component-type drop double ; + +: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable +: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable +: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable +: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable + +: c-type-interval ( c-type -- from to ) + { + { [ dup { float double } memq? ] [ drop -1/0. 1/0. ] } + { [ dup { char short int long longlong } memq? ] [ signed-interval ] } + { [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] } + } cond ; foldable + +: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 63297b9bdf..0be42a24ab 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -305,7 +305,7 @@ def: dst use: src1 src2 literal: rep ; -PURE-INSN: ##sub-vector +PURE-INSN: ##saturated-add-vector def: dst use: src1 src2 literal: rep ; @@ -315,11 +315,26 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##sub-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##saturated-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##mul-vector def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##saturated-mul-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##div-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 5b3fd1b324..b9835827fa 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -151,13 +151,16 @@ IN: compiler.cfg.intrinsics { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } } enable-intrinsics ; -: enable-sse2-simd ( -- ) +: enable-simd ( -- ) { { math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } @@ -165,14 +168,10 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; -: enable-sse3-simd ( -- ) - { - { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - } enable-intrinsics ; - : emit-intrinsic ( node word -- ) "intrinsic" word-prop call( node -- ) ; diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index c275756046..1186e6b41f 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -47,9 +47,12 @@ UNION: two-operand-insn ##min-float ##max-float ##add-vector - ##sub-vector + ##saturated-add-vector ##add-sub-vector + ##sub-vector + ##saturated-sub-vector ##mul-vector + ##saturated-mul-vector ##div-vector ##min-vector ##max-vector ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 14246a3fbf..06a16663cb 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -169,9 +169,12 @@ CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector -CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##saturated-add-vector %saturated-add-vector CODEGEN: ##add-sub-vector %add-sub-vector +CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##saturated-sub-vector %saturated-sub-vector CODEGEN: ##mul-vector %mul-vector +CODEGEN: ##saturated-mul-vector %saturated-mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index 621b8d082b..d4780b335b 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -18,6 +18,7 @@ compiler.tree.propagation.constraints compiler.tree.propagation.call-effect compiler.tree.propagation.transforms compiler.tree.propagation.simd ; +FROM: alien.c-types => (signed-interval) (unsigned-interval) ; IN: compiler.tree.propagation.known-words { + - * / } @@ -260,15 +261,9 @@ generic-comparison-ops [ alien-unsigned-8 } [ dup name>> { - { - [ "alien-signed-" ?head ] - [ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ] - } - { - [ "alien-unsigned-" ?head ] - [ string>number 8 * 2^ 1 - 0 swap [a,b] ] - } - } cond + { [ "alien-signed-" ?head ] [ string>number (signed-interval) ] } + { [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] } + } cond [a,b] [ fits-in-fixnum? fixnum integer ? ] keep '[ 2drop _ ] "outputs" set-word-prop ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 331d459adf..29e58cb163 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -181,9 +181,12 @@ HOOK: %broadcast-vector cpu ( dst src rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) -HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) @@ -194,9 +197,12 @@ HOOK: %broadcast-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) -HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps ) +HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %saturated-sub-vector-reps cpu ( -- reps ) HOOK: %mul-vector-reps cpu ( -- reps ) +HOOK: %saturated-mul-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index a132947cf1..62caf1ffe9 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -323,6 +323,30 @@ M: x86 %add-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) + { + { char-16-rep [ PADDSB ] } + { uchar-16-rep [ PADDUSB ] } + { short-8-rep [ PADDSW ] } + { ushort-8-rep [ PADDUSW ] } + } case drop ; + +M: x86 %saturated-add-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } + } available-reps ; + +M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ADDSUBPS ] } + { double-2-rep [ ADDSUBPD ] } + } case drop ; + +M: x86 %add-sub-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %sub-vector ( dst src1 src2 rep -- ) { { float-4-rep [ SUBPS ] } @@ -341,15 +365,17 @@ M: x86 %sub-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; -M: x86 %add-sub-vector ( dst src1 src2 rep -- ) +M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) { - { float-4-rep [ ADDSUBPS ] } - { double-2-rep [ ADDSUBPD ] } + { char-16-rep [ PSUBSB ] } + { uchar-16-rep [ PSUBUSB ] } + { short-8-rep [ PSUBSW ] } + { ushort-8-rep [ PSUBUSW ] } } case drop ; -M: x86 %add-sub-vector-reps +M: x86 %saturated-sub-vector-reps { - { sse3? { float-4-rep double-2-rep } } + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } } available-reps ; M: x86 %mul-vector ( dst src1 src2 rep -- ) @@ -368,6 +394,10 @@ M: x86 %mul-vector-reps { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %saturated-mul-vector-reps + ! No multiplication with saturation on x86 + { } ; + M: x86 %div-vector ( dst src1 src2 rep -- ) { { float-4-rep [ DIVPS ] } @@ -854,46 +884,29 @@ M: x86 small-enough? ( n -- ? ) #! set up by the caller. stack-frame get total-size>> + stack@ ; -: enable-sse2 ( -- ) - enable-float-intrinsics - enable-fsqrt - enable-float-min/max - enable-sse2-simd ; - -: enable-sse3 ( -- ) - enable-sse2 - enable-sse3-simd ; - -enable-min/max - -:: install-sse-check ( version -- ) +:: install-sse2-check ( -- ) [ - sse-version version < [ - "This image was built to use " write - version sse-string write - " but your CPU only supports " write - sse-version sse-string write "." print + sse-version 20 < [ + "This image was built to use SSE2 but your CPU does not support it." print "You will need to bootstrap Factor again." print flush 1 exit ] when ] "cpu.x86" add-init-hook ; -: enable-sse ( version -- ) - { - { 00 [ ] } - { 10 [ ] } - { 20 [ enable-sse2 ] } - { 30 [ enable-sse3 ] } - { 33 [ enable-sse3 ] } - { 41 [ enable-sse3 ] } - { 42 [ enable-sse3 ] } - } case ; +: enable-sse2 ( version -- ) + 20 >= [ + enable-float-intrinsics + enable-fsqrt + enable-float-min/max + install-sse2-check + ] when ; + + +enable-simd +enable-min/max : check-sse ( -- ) [ { sse_version } compile ] with-optimizer - - "Checking for multimedia extensions: " write sse-version 30 min - [ sse-string write " detected" print ] - [ install-sse-check ] - [ enable-sse ] tri ; + "Checking for multimedia extensions: " write sse-version + [ sse-string write " detected" print ] [ enable-sse2 ] bi ; diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 2141914d1c..7d84b18225 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -41,9 +41,12 @@ MACRO: simd-boa ( rep class -- simd-array ) [ { { v+ (simd-v+) } - { v- (simd-v-) } + { vs+ (simd-vs+) } { v+- (simd-v+-) } + { v- (simd-v-) } + { vs- (simd-vs-) } { v* (simd-v*) } + { vs* (simd-vs*) } { v/ (simd-v/) } { vmin (simd-vmin) } { vmax (simd-vmax) } @@ -111,7 +114,7 @@ A{ DEFINES ${A}{ NTH [ T dup c-type-getter-boxer array-accessor ] SET-NTH [ T dup c-setter array-accessor ] -A-rep IS ${A}-rep +A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -142,6 +145,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length underlying>> length ; inline +M: A element-type drop A-rep rep-component-type ; + M: A pprint-delims drop \ A{ \ } ; M: A >pprint-sequence ; @@ -172,9 +177,12 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } @@ -227,7 +235,7 @@ A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref -A-rep IS ${A/2}-rep +A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -267,6 +275,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length drop 32 ; inline +M: A element-type drop A-rep rep-component-type ; + SYNTAX: A{ \ } [ >A ] parse-literal ; M: A pprint-delims drop \ A{ \ } ; @@ -298,9 +308,12 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6d39b9e70a..63214f7da6 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -8,8 +8,11 @@ IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; : (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ; @@ -68,9 +71,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } { \ (simd-v+-) [ %add-sub-vector-reps ] } { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } { \ (simd-v/) [ %div-vector-reps ] } { \ (simd-vmin) [ %min-vector-reps ] } { \ (simd-vmax) [ %max-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d108d70b26..ef625ffff0 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -161,8 +161,12 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" $nl "It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." { $subsection (simd-v+) } -{ $subsection (simd-v-) } +{ $subsection (simd-vs+) } { $subsection (simd-v+-) } +{ $subsection (simd-v-) } +{ $subsection (simd-vs-) } +{ $subsection (simd-v*) } +{ $subsection (simd-vs*) } { $subsection (simd-v/) } { $subsection (simd-vmin) } { $subsection (simd-vmax) } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 1a85f5ade7..2fb36d428a 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -53,11 +53,14 @@ H{ { norm-sq { +vector+ -> +nonnegative+ } } { normalize { +vector+ -> +vector+ } } { v* { +vector+ +vector+ -> +vector+ } } + { vs* { +vector+ +vector+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } } + { vs+ { +vector+ +vector+ -> +vector+ } } { v+- { +vector+ +vector+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } } + { vs- { +vector+ +vector+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } } { v. { +vector+ +vector+ -> +scalar+ } } { v/ { +vector+ +vector+ -> +vector+ } } diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 4f2f093216..ce635b6d60 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -26,7 +26,11 @@ $nl { $subsection v. } { $subsection norm } { $subsection norm-sq } -{ $subsection normalize } ; +{ $subsection normalize } +"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:" +{ $subsection vs+ } +{ $subsection vs- } +{ $subsection vs* } ; ABOUT: "math-vectors" @@ -100,6 +104,34 @@ HELP: v. { $snippet "0 [ conjugate * + ] 2reduce" } } ; +HELP: vs+ +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } +{ $examples + "With saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ." + "uchar-array{ 170 255 220 }" + } + "Without saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ." + "uchar-array{ 170 14 220 }" + } +} ; + +HELP: vs- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ; + +HELP: vs* +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; @@ -120,3 +152,5 @@ HELP: set-axis { 2map v+ v- v* v/ } related-words { 2reduce v. } related-words + +{ vs+ vs- vs* } related-words diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index deda1dc505..3a1b2875a9 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,9 +1,11 @@ ! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays kernel sequences math math.functions hints -math.order ; +USING: arrays alien.c-types kernel sequences math math.functions +hints math.order fry ; IN: math.vectors +GENERIC: element-type ( obj -- c-type ) + : vneg ( u -- v ) [ neg ] map ; : v+n ( u n -- v ) [ + ] curry map ; @@ -29,6 +31,13 @@ IN: math.vectors [ [ not ] 2dip pick [ + ] [ - ] if ] 2map nip ; +: 2saturate-map ( u v quot -- w ) + pick element-type '[ @ _ c-type-clamp ] 2map ; inline + +: vs+ ( u v -- w ) [ + ] 2saturate-map ; +: vs- ( u v -- w ) [ - ] 2saturate-map ; +: vs* ( u v -- w ) [ * ] 2saturate-map ; + : vfloor ( v -- _v_ ) [ floor ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ; diff --git a/basis/specialized-arrays/specialized-arrays-tests.factor b/basis/specialized-arrays/specialized-arrays-tests.factor index 5d88f42d50..526312e0aa 100755 --- a/basis/specialized-arrays/specialized-arrays-tests.factor +++ b/basis/specialized-arrays/specialized-arrays-tests.factor @@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors kernel arrays combinators compiler compiler.units classes.struct combinators.smart compiler.tree.debugger math libc destructors sequences.private multiline eval words vocabs namespaces -assocs prettyprint alien.data ; +assocs prettyprint alien.data math.vectors ; FROM: alien.c-types => float ; SPECIALIZED-ARRAY: int @@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort SPECIALIZED-ARRAY: char SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: float +SPECIALIZED-ARRAY: ulonglong + +[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test [ t ] [ { 1 2 3 } >int-array int-array? ] unit-test diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index a64d052fd1..9692980858 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien alien.c-types alien.data alien.parser assocs -byte-arrays classes compiler.units functors kernel lexer libc math -math.vectors.specialization namespaces parser prettyprint.custom -sequences sequences.private strings summary vocabs vocabs.loader -vocabs.parser vocabs.generated words fry combinators ; +USING: accessors alien alien.c-types alien.data alien.parser +assocs byte-arrays classes compiler.units functors kernel lexer +libc math math.vectors math.vectors.specialization namespaces +parser prettyprint.custom sequences sequences.private strings +summary vocabs vocabs.loader vocabs.parser vocabs.generated +words fry combinators present ; IN: specialized-arrays MIXIN: specialized-array @@ -53,14 +54,14 @@ TUPLE: A : ( alien len -- specialized-array ) A boa ; inline -: ( n -- specialized-array ) [ T ] keep ; inline +: ( n -- specialized-array ) [ \ T ] keep ; inline -: (A) ( n -- specialized-array ) [ T (underlying) ] keep ; inline +: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep ; inline -: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep ; inline +: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep ; inline : byte-array>A ( byte-array -- specialized-array ) - dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless + dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless ; inline M: A clone [ underlying>> clone ] [ length>> ] bi ; inline @@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ; M: A resize [ - [ T heap-size * ] [ underlying>> ] bi* + [ \ T heap-size * ] [ underlying>> ] bi* resize-byte-array ] [ drop ] 2bi ; inline -M: A byte-length length T heap-size * ; inline +M: A byte-length length \ T heap-size * ; inline + +M: A element-type drop \ T ; inline M: A direct-array-syntax drop \ A@ ; @@ -116,15 +119,15 @@ M: word (underlying-type) "c-type" word-prop ; } cond ; : underlying-type-name ( c-type -- name ) - underlying-type dup word? [ name>> ] when ; + underlying-type present ; : specialized-array-vocab ( c-type -- vocab ) - "specialized-arrays.instances." prepend ; + present "specialized-arrays.instances." prepend ; PRIVATE> : define-array-vocab ( type -- vocab ) - underlying-type-name + underlying-type [ specialized-array-vocab ] [ '[ _ define-array ] ] bi generate-vocab ; diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S index d229b2cb79..fe97fe2179 100644 --- a/vm/cpu-x86.S +++ b/vm/cpu-x86.S @@ -77,12 +77,12 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)): DEF(bool,sse_version,(void)): mov $0x1,RETURN_REG cpuid - /* test $0x100000,%ecx + test $0x100000,%ecx jnz sse_42 test $0x80000,%ecx jnz sse_41 test $0x200,%ecx - jnz ssse_3 */ + jnz ssse_3 test $0x1,%ecx jnz sse_3 test $0x4000000,%edx