From e56cd5cc127b12380a0ef596cc611d01a0ee4da7 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 30 Sep 2009 19:04:02 -0500 Subject: [PATCH 01/19] accept f and t as elements of literal simd vectors, storing binary all-zeroes or all-ones --- .../math/vectors/simd/functor/functor.factor | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 5b72c544ae..6a7771c2c3 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs byte-arrays classes effects fry +USING: accessors assocs byte-arrays classes classes.algebra effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.private math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences @@ -11,6 +11,25 @@ IN: math.vectors.simd.functor ERROR: bad-length got expected ; +: vector-true-value ( class -- value ) + { + { [ dup integer class<= ] [ drop -1 ] } + { [ dup float class<= ] [ drop -1 bits>double ] } + } cond ; foldable + +: vector-false-value ( class -- value ) + { + { [ dup integer class<= ] [ drop 0 ] } + { [ dup float class<= ] [ drop 0.0 ] } + } cond ; foldable + +: boolean>element ( bool/elt class -- elt ) + swap { + { t [ vector-true-value ] } + { f [ vector-false-value ] } + [ nip ] + } case ; inline + MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; @@ -156,6 +175,8 @@ A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op +A-element-class [ A-rep rep-component-type c:c-type-boxed-class ] + WHERE TUPLE: A @@ -167,7 +188,9 @@ M: A length drop N ; inline M: A nth-unsafe underlying>> A-rep simd-nth ; inline -M: A set-nth-unsafe underlying>> SET-NTH call ; inline +M: A set-nth-unsafe + [ A-element-class boolean>element ] 2dip + underlying>> SET-NTH call ; inline : >A ( seq -- simd-array ) \ A new clone-like ; From 7db7b63552e0238860be710c8cbb3ec6525b6bb0 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 30 Sep 2009 23:03:59 -0500 Subject: [PATCH 02/19] add a %blend-vector intrinsic for v? --- .../cfg/instructions/instructions.factor | 6 ++++++ .../compiler/cfg/intrinsics/intrinsics.factor | 1 + .../compiler/cfg/intrinsics/simd/simd.factor | 7 +++++++ basis/compiler/codegen/codegen.factor | 1 + .../tree/propagation/simd/simd.factor | 1 + basis/cpu/architecture/architecture.factor | 2 ++ basis/cpu/ppc/ppc.factor | 1 + basis/cpu/x86/x86.factor | 11 ++++++++++ .../math/vectors/simd/functor/functor.factor | 15 ++++++++++++++ .../vectors/simd/intrinsics/intrinsics.factor | 2 ++ .../specialization/specialization.factor | 3 ++- basis/math/vectors/vectors.factor | 2 +- extra/math/matrices/simd/simd.factor | 20 +++++++++---------- 13 files changed, 60 insertions(+), 12 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index cf0f668db3..1dcfb4fd64 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -402,6 +402,12 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##blend-vector +def: dst +use: mask src1 src2 +temp: temp +literal: rep ; + PURE-INSN: ##shl-vector def: dst use: src1 src2/scalar-rep diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 76dace1f28..e97b5f090a 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -181,6 +181,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v?) [ emit-blend-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 51eced4e35..d41d001159 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -38,6 +38,9 @@ MACRO: if-literals-match ( quots -- ) : [binary] ( quot -- quot' ) '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline +: [ternary] ( quot -- quot' ) + '[ [ ds-drop 3inputs ] dip @ ds-push ] ; inline + : emit-binary-vector-op ( node quot -- ) [binary] emit-vector-op ; inline @@ -95,6 +98,10 @@ MACRO: if-literals-match ( quots -- ) [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline +: emit-blend-vector ( node -- ) + [ ^^blend-vector ] [ternary] + { [ representation? ] } if-literals-match ; inline + : emit-alien-vector ( node -- ) dup [ '[ diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index b0307f685d..b07c29e231 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -188,6 +188,7 @@ CODEGEN: ##and-vector %and-vector CODEGEN: ##andn-vector %andn-vector CODEGEN: ##or-vector %or-vector CODEGEN: ##xor-vector %xor-vector +CODEGEN: ##blend-vector %blend-vector CODEGEN: ##shl-vector %shl-vector CODEGEN: ##shr-vector %shr-vector CODEGEN: ##integer>scalar %integer>scalar diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index e2c2b15f2d..805f5ec158 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -28,6 +28,7 @@ IN: compiler.tree.propagation.simd (simd-with) (simd-gather-2) (simd-gather-4) + (simd-v?) alien-vector } [ { byte-array } "default-output-classes" set-word-prop ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 3b1f57d08e..9e2e1f5ac3 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -242,6 +242,7 @@ HOOK: %and-vector cpu ( dst src1 src2 rep -- ) HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) HOOK: %or-vector cpu ( dst src1 src2 rep -- ) HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) +HOOK: %blend-vector cpu ( dst mask src1 src2 temp rep -- ) HOOK: %shl-vector cpu ( dst src1 src2 rep -- ) HOOK: %shr-vector cpu ( dst src1 src2 rep -- ) HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- ) @@ -275,6 +276,7 @@ HOOK: %and-vector-reps cpu ( -- reps ) HOOK: %andn-vector-reps cpu ( -- reps ) HOOK: %or-vector-reps cpu ( -- reps ) HOOK: %xor-vector-reps cpu ( -- reps ) +HOOK: %blend-vector-reps cpu ( -- reps ) HOOK: %shl-vector-reps cpu ( -- reps ) HOOK: %shr-vector-reps cpu ( -- reps ) HOOK: %horizontal-shl-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 006d38f384..8503ac83ea 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -286,6 +286,7 @@ M: ppc %and-vector-reps { } ; M: ppc %andn-vector-reps { } ; M: ppc %or-vector-reps { } ; M: ppc %xor-vector-reps { } ; +M: ppc %blend-vector-reps { } ; M: ppc %shl-vector-reps { } ; M: ppc %shr-vector-reps { } ; M: ppc %horizontal-shl-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index eaaab19662..414ba2b6de 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1011,6 +1011,17 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M:: x86 %blend-vector ( dst mask src1 src2 temp rep -- ) + temp src1 mask rep %and-vector + dst mask src2 rep %andn-vector + dst dst temp rep %or-vector ; + +M: x86 %blend-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 6a7771c2c3..aea415a27c 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -30,6 +30,9 @@ ERROR: bad-length got expected ; [ nip ] } case ; inline +: element>boolean ( elt class -- bool ) + vector-false-value = not ; inline + MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; @@ -169,6 +172,7 @@ A{ DEFINES ${A}{ SET-NTH [ T dup c:c-setter c:array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] +A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -235,6 +239,9 @@ INSTANCE: A sequence v-op ( v1 v2 v3 quot -- v4 ) + [ [ underlying>> ] tri@ A-rep ] dip call \ A boa ; inline + : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline @@ -255,6 +262,7 @@ simd new \ A-with >>ctor \ A-rep >>rep { + { { +vector+ +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } @@ -316,6 +324,7 @@ A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] +A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -383,6 +392,11 @@ M: A pprint* pprint-object ; INSTANCE: A sequence +: A-vvv->v-op ( v1 v2 v3 quot -- v4 ) + [ [ [ underlying1>> ] tri@ A-rep ] dip call ] + [ [ [ underlying2>> ] tri@ A-rep ] dip call ] 3bi + \ A boa ; inline + : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ [ underlying1>> ] bi@ A-rep ] dip call ] [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi @@ -411,6 +425,7 @@ simd new \ A-with >>ctor \ A-rep >>rep { + { { +vector+ +vector+ +vector+ -> +vector+ } A-vvv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6008a20844..2f6d61bd53 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -49,6 +49,7 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle +SIMD-OP: v? : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; @@ -121,6 +122,7 @@ M: vector-rep supported-simd-op? { \ (simd-vbitandn) [ %andn-vector-reps ] } { \ (simd-vbitor) [ %or-vector-reps ] } { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-v?) [ %blend-vector-reps ] } { \ (simd-vlshift) [ %shl-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index ea9947a0c5..21d024a50e 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -92,6 +92,7 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } + { v? { +vector+ +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -159,4 +160,4 @@ ERROR: bad-vector-word word ; vector-words keys [ [ vector-word-custom-inlining ] "custom-inlining" set-word-prop -] each \ No newline at end of file +] each diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index a3d51752bd..dee849cb7a 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -101,7 +101,7 @@ PRIVATE> : v? ( ? true false -- w ) [ ? ] pick 3map-as ; -: vmask ( u ? -- u' ) swap dup dup vbitxor v? ; +: vmask ( u ? -- u' ) vbitand ; inline : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 0c4c3e1866..bc213fec3a 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c - factors { t t t f } vmask :> factors' + factors float-4{ t t t f } vmask :> factors' factors' { 0 3 3 3 } vshuffle factors' { 3 1 3 3 } vshuffle @@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c float-4{ 0.0 0.0 0.0 1.0 } :> c4 - { t t t f } offset c4 v? :> offset' + float-4{ t t t f } offset c4 v? :> offset' - offset' { 3 3 3 0 } vshuffle { t f f t } vmask - offset' { 3 3 3 1 } vshuffle { f t f t } vmask - offset' { 3 3 3 2 } vshuffle { f f t t } vmask + offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask + offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask + offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask c4 c set-rows ; @@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v* - { t t t f } vmask :> triangle-a + float-4{ t t t f } vmask :> triangle-a ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi @@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 matrix4 (struct) :> c near near near far + 2 near far * * float-4-boa :> num - { t t f f } xy near far - float-4-with v? :> denom + float-4{ t t f f } xy near far - float-4-with v? :> denom num denom v/ :> fov - fov { 0 0 0 0 } vshuffle { t f f f } vmask - fov { 1 1 1 1 } vshuffle { f t f f } vmask - fov { 2 2 2 3 } vshuffle { f f t t } vmask + fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask + fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask + fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask float-4{ 0.0 0.0 -1.0 0.0 } c set-rows ; From a93f8f66f954fd2c1c1362b3069483a30d09d001 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 30 Sep 2009 23:40:37 -0500 Subject: [PATCH 03/19] Revert "add a %blend-vector intrinsic for v?" This reverts commit 21e4b28b67002b916d41108c23756e48ad5ca433. --- .../cfg/instructions/instructions.factor | 6 ------ .../compiler/cfg/intrinsics/intrinsics.factor | 1 - .../compiler/cfg/intrinsics/simd/simd.factor | 7 ------- basis/compiler/codegen/codegen.factor | 1 - .../tree/propagation/simd/simd.factor | 1 - basis/cpu/architecture/architecture.factor | 2 -- basis/cpu/ppc/ppc.factor | 1 - basis/cpu/x86/x86.factor | 11 ---------- .../math/vectors/simd/functor/functor.factor | 15 -------------- .../vectors/simd/intrinsics/intrinsics.factor | 2 -- .../specialization/specialization.factor | 3 +-- basis/math/vectors/vectors.factor | 2 +- extra/math/matrices/simd/simd.factor | 20 +++++++++---------- 13 files changed, 12 insertions(+), 60 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 97a5cd13c6..aefa155ec5 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -402,12 +402,6 @@ def: dst use: src1 src2 literal: rep ; -PURE-INSN: ##blend-vector -def: dst -use: mask src1 src2 -temp: temp -literal: rep ; - PURE-INSN: ##shl-vector def: dst use: src1 src2/int-scalar-rep diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index e97b5f090a..76dace1f28 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -181,7 +181,6 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v?) [ emit-blend-vector ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 7604b731d7..62ee1cf019 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -38,9 +38,6 @@ MACRO: if-literals-match ( quots -- ) : [binary] ( quot -- quot' ) '[ [ ds-drop 2inputs ] dip @ ds-push ] ; inline -: [ternary] ( quot -- quot' ) - '[ [ ds-drop 3inputs ] dip @ ds-push ] ; inline - : emit-binary-vector-op ( node quot -- ) [binary] emit-vector-op ; inline @@ -100,10 +97,6 @@ MACRO: if-literals-match ( quots -- ) [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline -: emit-blend-vector ( node -- ) - [ ^^blend-vector ] [ternary] - { [ representation? ] } if-literals-match ; inline - : emit-alien-vector ( node -- ) dup [ '[ diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index b07c29e231..b0307f685d 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -188,7 +188,6 @@ CODEGEN: ##and-vector %and-vector CODEGEN: ##andn-vector %andn-vector CODEGEN: ##or-vector %or-vector CODEGEN: ##xor-vector %xor-vector -CODEGEN: ##blend-vector %blend-vector CODEGEN: ##shl-vector %shl-vector CODEGEN: ##shr-vector %shr-vector CODEGEN: ##integer>scalar %integer>scalar diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 805f5ec158..e2c2b15f2d 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -28,7 +28,6 @@ IN: compiler.tree.propagation.simd (simd-with) (simd-gather-2) (simd-gather-4) - (simd-v?) alien-vector } [ { byte-array } "default-output-classes" set-word-prop ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 9e2e1f5ac3..3b1f57d08e 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -242,7 +242,6 @@ HOOK: %and-vector cpu ( dst src1 src2 rep -- ) HOOK: %andn-vector cpu ( dst src1 src2 rep -- ) HOOK: %or-vector cpu ( dst src1 src2 rep -- ) HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) -HOOK: %blend-vector cpu ( dst mask src1 src2 temp rep -- ) HOOK: %shl-vector cpu ( dst src1 src2 rep -- ) HOOK: %shr-vector cpu ( dst src1 src2 rep -- ) HOOK: %horizontal-shl-vector cpu ( dst src1 src2 rep -- ) @@ -276,7 +275,6 @@ HOOK: %and-vector-reps cpu ( -- reps ) HOOK: %andn-vector-reps cpu ( -- reps ) HOOK: %or-vector-reps cpu ( -- reps ) HOOK: %xor-vector-reps cpu ( -- reps ) -HOOK: %blend-vector-reps cpu ( -- reps ) HOOK: %shl-vector-reps cpu ( -- reps ) HOOK: %shr-vector-reps cpu ( -- reps ) HOOK: %horizontal-shl-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index f290e9cc3b..de37cd6ee3 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -286,7 +286,6 @@ M: ppc %and-vector-reps { } ; M: ppc %andn-vector-reps { } ; M: ppc %or-vector-reps { } ; M: ppc %xor-vector-reps { } ; -M: ppc %blend-vector-reps { } ; M: ppc %shl-vector-reps { } ; M: ppc %shr-vector-reps { } ; M: ppc %horizontal-shl-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 14b9f275e5..3c20064313 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1011,17 +1011,6 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M:: x86 %blend-vector ( dst mask src1 src2 temp rep -- ) - temp src1 mask rep %and-vector - dst mask src2 rep %andn-vector - dst dst temp rep %or-vector ; - -M: x86 %blend-vector-reps - { - { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } - } available-reps ; - M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index aea415a27c..6a7771c2c3 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -30,9 +30,6 @@ ERROR: bad-length got expected ; [ nip ] } case ; inline -: element>boolean ( elt class -- bool ) - vector-false-value = not ; inline - MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; @@ -172,7 +169,6 @@ A{ DEFINES ${A}{ SET-NTH [ T dup c:c-setter c:array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] -A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -239,9 +235,6 @@ INSTANCE: A sequence v-op ( v1 v2 v3 quot -- v4 ) - [ [ underlying>> ] tri@ A-rep ] dip call \ A boa ; inline - : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline @@ -262,7 +255,6 @@ simd new \ A-with >>ctor \ A-rep >>rep { - { { +vector+ +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } @@ -324,7 +316,6 @@ A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] -A-vvv->v-op DEFINES-PRIVATE ${A}-vvv->v-op A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op @@ -392,11 +383,6 @@ M: A pprint* pprint-object ; INSTANCE: A sequence -: A-vvv->v-op ( v1 v2 v3 quot -- v4 ) - [ [ [ underlying1>> ] tri@ A-rep ] dip call ] - [ [ [ underlying2>> ] tri@ A-rep ] dip call ] 3bi - \ A boa ; inline - : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ [ underlying1>> ] bi@ A-rep ] dip call ] [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi @@ -425,7 +411,6 @@ simd new \ A-with >>ctor \ A-rep >>rep { - { { +vector+ +vector+ +vector+ -> +vector+ } A-vvv->v-op } { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 2f6d61bd53..6008a20844 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -49,7 +49,6 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle -SIMD-OP: v? : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; @@ -122,7 +121,6 @@ M: vector-rep supported-simd-op? { \ (simd-vbitandn) [ %andn-vector-reps ] } { \ (simd-vbitor) [ %or-vector-reps ] } { \ (simd-vbitxor) [ %xor-vector-reps ] } - { \ (simd-v?) [ %blend-vector-reps ] } { \ (simd-vlshift) [ %shl-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 5d2c6b7a5a..b07615bfc9 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -92,7 +92,6 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } - { v? { +vector+ +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -163,4 +162,4 @@ ERROR: bad-vector-word word ; vector-words keys [ [ vector-word-custom-inlining ] "custom-inlining" set-word-prop -] each +] each \ No newline at end of file diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index dee849cb7a..a3d51752bd 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -101,7 +101,7 @@ PRIVATE> : v? ( ? true false -- w ) [ ? ] pick 3map-as ; -: vmask ( u ? -- u' ) vbitand ; inline +: vmask ( u ? -- u' ) swap dup dup vbitxor v? ; : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index bc213fec3a..0c4c3e1866 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c - factors float-4{ t t t f } vmask :> factors' + factors { t t t f } vmask :> factors' factors' { 0 3 3 3 } vshuffle factors' { 3 1 3 3 } vshuffle @@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c float-4{ 0.0 0.0 0.0 1.0 } :> c4 - float-4{ t t t f } offset c4 v? :> offset' + { t t t f } offset c4 v? :> offset' - offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask - offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask - offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask + offset' { 3 3 3 0 } vshuffle { t f f t } vmask + offset' { 3 3 3 1 } vshuffle { f t f t } vmask + offset' { 3 3 3 2 } vshuffle { f f t t } vmask c4 c set-rows ; @@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v* - float-4{ t t t f } vmask :> triangle-a + { t t t f } vmask :> triangle-a ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi @@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 matrix4 (struct) :> c near near near far + 2 near far * * float-4-boa :> num - float-4{ t t f f } xy near far - float-4-with v? :> denom + { t t f f } xy near far - float-4-with v? :> denom num denom v/ :> fov - fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask - fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask - fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask + fov { 0 0 0 0 } vshuffle { t f f f } vmask + fov { 1 1 1 1 } vshuffle { f t f f } vmask + fov { 2 2 2 3 } vshuffle { f f t t } vmask float-4{ 0.0 0.0 -1.0 0.0 } c set-rows ; From 5ac5a74cc6dc08260413a8c1a8d7b1b92dca0de1 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 00:09:25 -0500 Subject: [PATCH 04/19] write v? and vmask in terms of bitwise ops --- basis/math/vectors/vectors-docs.factor | 4 ++-- basis/math/vectors/vectors.factor | 5 +++-- extra/math/matrices/simd/simd.factor | 20 ++++++++++---------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 1d323822bd..9b6f0a04f1 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -343,8 +343,8 @@ HELP: vmask { $description "Returns a copy of " { $snippet "u" } " with the elements for which the corresponding element of " { $snippet "?" } " is false replaced by zero." } ; HELP: v? -{ $values { "?" "a sequence of booleans" } { "true" "a sequence of numbers" } { "false" "a sequence of numbers" } { "w" "a sequence of numbers" } } -{ $description "Creates a new sequence by selecting elements from the " { $snippet "true" } " and " { $snippet "false" } " sequences based on whether the corresponding element of the " { $snippet "?" } " sequence is true or false." } ; +{ $values { "mask" "a sequence of booleans" } { "true" "a sequence of numbers" } { "false" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Creates a new sequence by selecting elements from the " { $snippet "true" } " and " { $snippet "false" } " sequences based on whether the corresponding bits of the " { $snippet "mask" } " sequence are set or not." } ; { 2map v+ v- v* v/ } related-words diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index a3d51752bd..ffb761c543 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -99,9 +99,10 @@ PRIVATE> : vunordered? ( u v -- w ) [ unordered? ] { } 2map-as ; : v= ( u v -- w ) [ = ] { } 2map-as ; -: v? ( ? true false -- w ) [ ? ] pick 3map-as ; +: v? ( mask true false -- w ) + [ vbitand ] [ vbitandn ] bi-curry* bi vbitor ; inline -: vmask ( u ? -- u' ) swap dup dup vbitxor v? ; +: vmask ( u ? -- u' ) vbitand ; inline : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 0c4c3e1866..bc213fec3a 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -121,7 +121,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c - factors { t t t f } vmask :> factors' + factors float-4{ t t t f } vmask :> factors' factors' { 0 3 3 3 } vshuffle factors' { 3 1 3 3 } vshuffle @@ -137,11 +137,11 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c float-4{ 0.0 0.0 0.0 1.0 } :> c4 - { t t t f } offset c4 v? :> offset' + float-4{ t t t f } offset c4 v? :> offset' - offset' { 3 3 3 0 } vshuffle { t f f t } vmask - offset' { 3 3 3 1 } vshuffle { f t f t } vmask - offset' { 3 3 3 2 } vshuffle { f f t t } vmask + offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask + offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask + offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask c4 c set-rows ; @@ -166,7 +166,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v* - { t t t f } vmask :> triangle-a + float-4{ t t t f } vmask :> triangle-a ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi @@ -186,12 +186,12 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 matrix4 (struct) :> c near near near far + 2 near far * * float-4-boa :> num - { t t f f } xy near far - float-4-with v? :> denom + float-4{ t t f f } xy near far - float-4-with v? :> denom num denom v/ :> fov - fov { 0 0 0 0 } vshuffle { t f f f } vmask - fov { 1 1 1 1 } vshuffle { f t f f } vmask - fov { 2 2 2 3 } vshuffle { f f t t } vmask + fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask + fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask + fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask float-4{ 0.0 0.0 -1.0 0.0 } c set-rows ; From 0044964e783c506e1f5277b54a000fe24e5ebee4 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 13:04:59 -0500 Subject: [PATCH 05/19] fix a stupid bug in simd rotation-matrix4 --- extra/math/matrices/simd/simd-tests.factor | 13 +++++++++++++ extra/math/matrices/simd/simd.factor | 21 ++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/extra/math/matrices/simd/simd-tests.factor b/extra/math/matrices/simd/simd-tests.factor index 5bd61adefd..60b37f5371 100644 --- a/extra/math/matrices/simd/simd-tests.factor +++ b/extra/math/matrices/simd/simd-tests.factor @@ -53,6 +53,19 @@ IN: math.matrices.simd.tests 1.0e-7 m~ ] unit-test +[ t ] [ + float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4 + S{ matrix4 f + float-4-array{ + float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ -1.0 0.0 0.0 0.0 } + float-4{ 0.0 0.0 0.0 1.0 } + } + } + 1.0e-7 m~ +] unit-test + [ S{ matrix4 f float-4-array{ diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index bc213fec3a..fc6c2a03f8 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -95,6 +95,17 @@ TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 ) c set-rows ; +TYPED:: v.m4 ( a: float-4 b: matrix4 -- c: float-4 ) + b rows :> b4 :> b3 :> b2 :> b1 + + a first b1 n*v + a second b2 n*v v+ + a third b3 n*v v+ + a fourth b4 n*v v+ ; + +TYPED:: m4.v ( a: matrix4 b: float-4 -- c: float-4 ) + a rows [ b v. ] 4 napply float-4-boa ; + CONSTANT: identity-matrix4 S{ matrix4 f float-4-array{ @@ -165,17 +176,17 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal - axis { 0 0 1 3 } vshuffle axis { 1 2 2 3 } vshuffle v* 1-c v* + axis { 1 0 0 3 } vshuffle axis { 2 2 1 3 } vshuffle v* 1-c v* float-4{ t t t f } vmask :> triangle-a - ss { 2 1 0 3 } vshuffle triangle-sign v* :> triangle-b + ss axis v* triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi diagonal scale-matrix4 :> diagonal-m - triangle-hi { 3 0 1 3 } vshuffle - triangle-hi { 3 3 2 3 } vshuffle triangle-lo { 0 3 3 3 } vshuffle v+ - triangle-lo { 1 2 3 3 } vshuffle + triangle-hi { 3 2 1 3 } vshuffle + triangle-hi { 3 3 0 3 } vshuffle triangle-lo { 2 3 3 3 } vshuffle v+ + triangle-lo { 1 0 3 3 } vshuffle float-4 new triangle-m set-rows drop From 0c8a4717f2bc8bb923effb6c1cf6f54045124c60 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 13:21:10 -0500 Subject: [PATCH 06/19] add software vall?, vany?, vnone? words --- basis/math/vectors/vectors-docs.factor | 23 +++++++++++++++++------ basis/math/vectors/vectors.factor | 6 ++++-- extra/math/matrices/simd/simd.factor | 16 ++++++++-------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 9b6f0a04f1..547021afdb 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -61,8 +61,11 @@ ARTICLE: "math-vectors-logic" "Vector componentwise logic" { $subsection vand } { $subsection vor } { $subsection vxor } -{ $subsection vmask } { $subsection v? } +"Entire vector tests:" +{ $subsection vall? } +{ $subsection vany? } +{ $subsection vnone? } "Element shuffling:" { $subsection vshuffle } ; @@ -338,20 +341,28 @@ HELP: vnot { $values { "u" "a sequence of booleans" } { "w" "a sequence of booleans" } } { $description "Takes the logical NOT of each element of " { $snippet "u" } "." } ; -HELP: vmask -{ $values { "u" "a sequence of numbers" } { "?" "a sequence of booleans" } { "u'" "a sequence of numbers" } } -{ $description "Returns a copy of " { $snippet "u" } " with the elements for which the corresponding element of " { $snippet "?" } " is false replaced by zero." } ; - HELP: v? { $values { "mask" "a sequence of booleans" } { "true" "a sequence of numbers" } { "false" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Creates a new sequence by selecting elements from the " { $snippet "true" } " and " { $snippet "false" } " sequences based on whether the corresponding bits of the " { $snippet "mask" } " sequence are set or not." } ; +HELP: vany? +{ $values { "v" "a sequence of booleans" } { "?" "a boolean" } } +{ $description "Returns true if any element of " { $snippet "v" } " is true." } ; + +HELP: vall? +{ $values { "v" "a sequence of booleans" } { "?" "a boolean" } } +{ $description "Returns true if every element of " { $snippet "v" } " is true." } ; + +HELP: vnone? +{ $values { "v" "a sequence of booleans" } { "?" "a boolean" } } +{ $description "Returns true if every element of " { $snippet "v" } " is false." } ; + { 2map v+ v- v* v/ } related-words { 2reduce v. } related-words { vs+ vs- vs* } related-words -{ v< v<= v= v> v>= vunordered? vand vor vxor vnot vmask v? } related-words +{ v< v<= v= v> v>= vunordered? vand vor vxor vnot vany? vall? vnone? v? } related-words { vbitand vbitandn vbitor vbitxor vbitnot } related-words diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index ffb761c543..c2bfb3f295 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -92,6 +92,10 @@ PRIVATE> : vxor ( u v -- w ) [ xor ] 2map ; : vnot ( u -- w ) [ not ] map ; +: vall? ( v -- ? ) [ ] all? ; +: vany? ( v -- ? ) [ ] any? ; +: vnone? ( v -- ? ) [ not ] all? ; + : v< ( u v -- w ) [ < ] { } 2map-as ; : v<= ( u v -- w ) [ <= ] { } 2map-as ; : v>= ( u v -- w ) [ >= ] { } 2map-as ; @@ -102,8 +106,6 @@ PRIVATE> : v? ( mask true false -- w ) [ vbitand ] [ vbitandn ] bi-curry* bi vbitor ; inline -: vmask ( u ? -- u' ) vbitand ; inline - : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ; : vtruncate ( u -- v ) [ truncate ] map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index fc6c2a03f8..16960993b6 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -132,7 +132,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) TYPED:: scale-matrix4 ( factors: float-4 -- matrix: matrix4 ) matrix4 (struct) :> c - factors float-4{ t t t f } vmask :> factors' + factors float-4{ t t t f } vbitand :> factors' factors' { 0 3 3 3 } vshuffle factors' { 3 1 3 3 } vshuffle @@ -150,9 +150,9 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) float-4{ 0.0 0.0 0.0 1.0 } :> c4 float-4{ t t t f } offset c4 v? :> offset' - offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vmask - offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vmask - offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vmask + offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vbitand + offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vbitand + offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vbitand c4 c set-rows ; @@ -177,7 +177,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) axis2 cc ones axis2 v- v* v+ :> diagonal axis { 1 0 0 3 } vshuffle axis { 2 2 1 3 } vshuffle v* 1-c v* - float-4{ t t t f } vmask :> triangle-a + float-4{ t t t f } vbitand :> triangle-a ss axis v* triangle-sign v* :> triangle-b triangle-a triangle-b v+ :> triangle-lo triangle-a triangle-b v- :> triangle-hi @@ -200,9 +200,9 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 float-4{ t t f f } xy near far - float-4-with v? :> denom num denom v/ :> fov - fov { 0 0 0 0 } vshuffle float-4{ t f f f } vmask - fov { 1 1 1 1 } vshuffle float-4{ f t f f } vmask - fov { 2 2 2 3 } vshuffle float-4{ f f t t } vmask + fov { 0 0 0 0 } vshuffle float-4{ t f f f } vbitand + fov { 1 1 1 1 } vshuffle float-4{ f t f f } vbitand + fov { 2 2 2 3 } vshuffle float-4{ f f t t } vbitand float-4{ 0.0 0.0 -1.0 0.0 } c set-rows ; From 987ced40701af6593035993c7f6a9efc98f589d5 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 14:31:37 -0500 Subject: [PATCH 07/19] %compare-vector instruction (only does v= for now) --- .../cfg/instructions/instructions.factor | 5 +++++ .../compiler/cfg/intrinsics/intrinsics.factor | 1 + basis/compiler/codegen/codegen.factor | 1 + .../tree/propagation/simd/simd.factor | 1 + basis/cpu/architecture/architecture.factor | 2 ++ basis/cpu/ppc/ppc.factor | 1 + basis/cpu/x86/x86.factor | 22 +++++++++++++++++++ .../vectors/simd/intrinsics/intrinsics.factor | 2 ++ .../specialization/specialization.factor | 3 ++- 9 files changed, 37 insertions(+), 1 deletion(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index aefa155ec5..1494348179 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -297,6 +297,11 @@ def: dst use: src literal: shuffle rep ; +PURE-INSN: ##compare-vector +def: dst +use: src1 src2 +literal: rep cc ; + PURE-INSN: ##add-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 76dace1f28..0b565b686c 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -171,6 +171,7 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] } diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index b0307f685d..98d1041772 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -166,6 +166,7 @@ CODEGEN: ##zero-vector %zero-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector +CODEGEN: ##compare-vector %compare-vector CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##saturated-add-vector %saturated-add-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index e2c2b15f2d..35ce810df0 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -25,6 +25,7 @@ IN: compiler.tree.propagation.simd (simd-hlshift) (simd-hrshift) (simd-vshuffle) + (simd-v=) (simd-with) (simd-gather-2) (simd-gather-4) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 3b1f57d08e..629579ab1e 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -223,6 +223,7 @@ HOOK: %zero-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) @@ -256,6 +257,7 @@ HOOK: %zero-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) +HOOK: %compare-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index de37cd6ee3..81064491c1 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -267,6 +267,7 @@ M: ppc %zero-vector-reps { } ; M: ppc %gather-vector-2-reps { } ; M: ppc %gather-vector-4-reps { } ; M: ppc %shuffle-vector-reps { } ; +M: ppc %compare-vector-reps { } ; M: ppc %add-vector-reps { } ; M: ppc %saturated-add-vector-reps { } ; M: ppc %add-sub-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 3c20064313..85fed02429 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -725,6 +725,28 @@ M: x86 %shuffle-vector-reps { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +: %compare-vector-equal ( dst src rep -- ) + unsign-rep { + { double-2-rep [ CMPEQPD ] } + { float-4-rep [ CMPEQPS ] } + { longlong-2-rep [ PCMPEQQ ] } + { int-4-rep [ PCMPEQD ] } + { short-8-rep [ PCMPEQW ] } + { char-16-rep [ PCMPEQB ] } + } case ; + +M: x86 %compare-vector ( dst src1 src2 rep cc -- ) + [ [ two-operand ] keep ] dip { + { cc= [ %compare-vector-equal ] } + } case ; + +M: x86 %compare-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + { sse4.1? { longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %add-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6008a20844..b5cb9cf634 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -49,6 +49,7 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle +SIMD-OP: v= : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; @@ -126,6 +127,7 @@ M: vector-rep supported-simd-op? { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-v=) [ %compare-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } } case member? ; diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index b07615bfc9..e1a4c00153 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -92,6 +92,7 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } + { v= { +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; @@ -162,4 +163,4 @@ ERROR: bad-vector-word word ; vector-words keys [ [ vector-word-custom-inlining ] "custom-inlining" set-word-prop -] each \ No newline at end of file +] each From d14f150b587bff3d48829845e02eec264fc7a79e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 15:35:38 -0500 Subject: [PATCH 08/19] %test-vector instruction for vany?, vall?, vnone? --- .../cfg/comparisons/comparisons.factor | 3 +++ .../cfg/instructions/instructions.factor | 6 ++++++ .../compiler/cfg/intrinsics/intrinsics.factor | 3 +++ basis/compiler/codegen/codegen.factor | 1 + .../tree/propagation/simd/simd.factor | 6 ++++++ basis/cpu/architecture/architecture.factor | 2 ++ basis/cpu/ppc/ppc.factor | 1 + basis/cpu/x86/x86.factor | 21 +++++++++++++++++++ .../vectors/simd/intrinsics/intrinsics.factor | 6 ++++++ .../specialization/specialization.factor | 3 +++ 10 files changed, 52 insertions(+) diff --git a/basis/compiler/cfg/comparisons/comparisons.factor b/basis/compiler/cfg/comparisons/comparisons.factor index e7c19e7206..d538ee818c 100644 --- a/basis/compiler/cfg/comparisons/comparisons.factor +++ b/basis/compiler/cfg/comparisons/comparisons.factor @@ -9,6 +9,9 @@ SYMBOLS: cc< cc<= cc= cc> cc>= cc<> cc<>= cc/< cc/<= cc/= cc/> cc/>= cc/<> cc/<>= ; +SYMBOLS: + vcc-all vcc-any vcc-none ; + : negate-cc ( cc -- cc' ) H{ { cc< cc/< } diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 1494348179..09e7736235 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -302,6 +302,12 @@ def: dst use: src1 src2 literal: rep cc ; +PURE-INSN: ##test-vector +def: dst/int-rep +use: src +temp: temp/int-rep +literal: rep vcc ; + PURE-INSN: ##add-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 0b565b686c..d8f34b4164 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -172,6 +172,9 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vlshift) [ [ ^^shl-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vrshift) [ [ ^^shr-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-hlshift) [ [ ^^horizontal-shl-vector ] emit-horizontal-shift ] } diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 98d1041772..7c4c593d16 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -167,6 +167,7 @@ CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##compare-vector %compare-vector +CODEGEN: ##test-vector %test-vector CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##saturated-add-vector %saturated-add-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 35ce810df0..c8be614886 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -46,6 +46,12 @@ IN: compiler.tree.propagation.simd \ (simd-v.) [ 2nip scalar-output-class ] "outputs" set-word-prop +{ + (simd-vany?) + (simd-vall?) + (simd-vnone?) +} [ { boolean } "default-output-classes" set-word-prop ] each + \ (simd-select) [ 2nip scalar-output-class ] "outputs" set-word-prop \ assert-positive [ diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 629579ab1e..ffe77671a7 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -224,6 +224,7 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) +HOOK: %test-vector cpu ( dst src temp rep vcc -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) @@ -258,6 +259,7 @@ HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( -- reps ) +HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 81064491c1..cfad8259d8 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -268,6 +268,7 @@ M: ppc %gather-vector-2-reps { } ; M: ppc %gather-vector-4-reps { } ; M: ppc %shuffle-vector-reps { } ; M: ppc %compare-vector-reps { } ; +M: ppc %test-vector-reps { } ; M: ppc %add-vector-reps { } ; M: ppc %saturated-add-vector-reps { } ; M: ppc %add-sub-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 85fed02429..877afaa390 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -747,6 +747,27 @@ M: x86 %compare-vector-reps { sse4.1? { longlong-2-rep ulonglong-2-rep } } } available-reps ; +:: (%test-vector) ( dst temp mask vcc -- ) + vcc { + { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] } + { vcc-none [ dst dst TEST dst temp \ CMOVE %boolean ] } + { vcc-all [ dst mask CMP dst temp \ CMOVE %boolean ] } + } case ; + +M:: x86 %test-vector ( dst src temp rep vcc -- ) + dst src rep { + { double-2-rep [ MOVMSKPD HEX: 3 ] } + { float-4-rep [ MOVMSKPS HEX: f ] } + [ drop PMOVMSKB HEX: ffff ] + } case :> mask + dst temp mask vcc (%test-vector) ; + +M: x86 %test-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %add-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index b5cb9cf634..cbdbade222 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -50,6 +50,9 @@ SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle SIMD-OP: v= +SIMD-OP: vany? +SIMD-OP: vall? +SIMD-OP: vnone? : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; @@ -130,4 +133,7 @@ M: vector-rep supported-simd-op? { \ (simd-v=) [ %compare-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } + { \ (simd-vany?) [ %test-vector-reps ] } + { \ (simd-vall?) [ %test-vector-reps ] } + { \ (simd-vnone?) [ %test-vector-reps ] } } case member? ; diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index e1a4c00153..ffb148f55d 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -93,6 +93,9 @@ H{ { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } { v= { +vector+ +vector+ -> +vector+ } } + { vany? { +vector+ -> +scalar+ } } + { vall? { +vector+ -> +scalar+ } } + { vnone? { +vector+ -> +scalar+ } } } PREDICATE: vector-word < word vector-words key? ; From f9695951a072e689d5591798e06a1dd88d6b369c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 19:53:30 -0500 Subject: [PATCH 09/19] fold test-vector/branch sequences into a test-vector-branch instruction --- .../cfg/comparisons/comparisons.factor | 10 +++++- .../cfg/instructions/instructions.factor | 13 +++++++- .../cfg/linearization/linearization.factor | 23 ++++++++++--- .../value-numbering/rewrite/rewrite.factor | 10 ++++++ .../value-numbering-tests.factor | 18 +++++++++++ basis/compiler/codegen/codegen.factor | 1 + basis/cpu/architecture/architecture.factor | 3 +- basis/cpu/x86/x86.factor | 32 ++++++++++++++----- 8 files changed, 95 insertions(+), 15 deletions(-) diff --git a/basis/compiler/cfg/comparisons/comparisons.factor b/basis/compiler/cfg/comparisons/comparisons.factor index d538ee818c..0b4a6f2f02 100644 --- a/basis/compiler/cfg/comparisons/comparisons.factor +++ b/basis/compiler/cfg/comparisons/comparisons.factor @@ -10,7 +10,7 @@ SYMBOLS: cc/< cc/<= cc/= cc/> cc/>= cc/<> cc/<>= ; SYMBOLS: - vcc-all vcc-any vcc-none ; + vcc-all vcc-notall vcc-any vcc-none ; : negate-cc ( cc -- cc' ) H{ @@ -30,6 +30,14 @@ SYMBOLS: { cc/<>= cc<>= } } at ; +: negate-vcc ( cc -- cc' ) + H{ + { vcc-all vcc-notall } + { vcc-any vcc-none } + { vcc-none vcc-any } + { vcc-notall vcc-all } + } at ; + : swap-cc ( cc -- cc' ) H{ { cc< cc> } diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 09e7736235..d40aabcb19 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -304,7 +304,18 @@ literal: rep cc ; PURE-INSN: ##test-vector def: dst/int-rep -use: src +use: src1 +temp: temp/int-rep +literal: rep vcc ; + +INSN: ##test-vector-branch +use: src1 +temp: temp/int-rep +literal: rep vcc ; + +INSN: _test-vector-branch +literal: label +use: src1 temp: temp/int-rep literal: rep vcc ; diff --git a/basis/compiler/cfg/linearization/linearization.factor b/basis/compiler/cfg/linearization/linearization.factor index 66ac1addb0..31a4247206 100755 --- a/basis/compiler/cfg/linearization/linearization.factor +++ b/basis/compiler/cfg/linearization/linearization.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel math accessors sequences namespaces make combinators assocs arrays locals layouts hashtables -cpu.architecture +cpu.architecture generalizations compiler.cfg compiler.cfg.comparisons compiler.cfg.stack-frame @@ -42,14 +42,26 @@ M: ##branch linearize-insn : successors ( bb -- first second ) successors>> first2 ; inline +:: conditional ( bb insn n conditional-quot negate-cc-quot -- bb successor label ... ) + bb insn + conditional-quot + [ drop dup successors>> second useless-branch? ] 2bi + [ [ swap block-number ] n ndip ] + [ [ block-number ] n ndip negate-cc-quot call ] if ; inline + : (binary-conditional) ( bb insn -- bb successor1 successor2 src1 src2 cc ) [ dup successors ] [ [ src1>> ] [ src2>> ] [ cc>> ] tri ] bi* ; inline : binary-conditional ( bb insn -- bb successor label2 src1 src2 cc ) - [ (binary-conditional) ] - [ drop dup successors>> second useless-branch? ] 2bi - [ [ swap block-number ] 3dip ] [ [ block-number ] 3dip negate-cc ] if ; + 3 [ (binary-conditional) ] [ negate-cc ] conditional ; + +: (test-vector-conditional) ( bb insn -- bb successor1 successor2 src1 temp rep vcc ) + [ dup successors ] + [ { [ src1>> ] [ temp>> ] [ rep>> ] [ vcc>> ] } cleave ] bi* ; inline + +: test-vector-conditional ( bb insn -- bb successor label src1 temp rep vcc ) + 4 [ (test-vector-conditional) ] [ negate-vcc ] conditional ; M: ##compare-branch linearize-insn binary-conditional _compare-branch emit-branch ; @@ -63,6 +75,9 @@ M: ##compare-float-ordered-branch linearize-insn M: ##compare-float-unordered-branch linearize-insn binary-conditional _compare-float-unordered-branch emit-branch ; +M: ##test-vector-branch linearize-insn + test-vector-conditional _test-vector-branch emit-branch ; + : overflow-conditional ( bb insn -- bb successor label2 dst src1 src2 ) [ dup successors block-number ] [ [ dst>> ] [ src1>> ] [ src2>> ] tri ] bi* ; inline diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 8e5e013606..9827e02bf5 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -40,6 +40,7 @@ M: insn rewrite drop f ; [ compare-imm-expr? ] [ compare-float-unordered-expr? ] [ compare-float-ordered-expr? ] + [ test-vector-expr? ] } 1|| ; : rewrite-boolean-comparison? ( insn -- ? ) @@ -53,12 +54,21 @@ M: insn rewrite drop f ; : >compare-imm-expr< ( expr -- in1 in2 cc ) [ src1>> vn>vreg ] [ src2>> vn>constant ] [ cc>> ] tri ; inline +: >test-vector-expr< ( expr -- src1 temp rep vcc ) + { + [ src1>> vn>vreg ] + [ drop next-vreg ] + [ rep>> ] + [ vcc>> ] + } cleave ; inline + : rewrite-boolean-comparison ( expr -- insn ) src1>> vreg>expr { { [ dup compare-expr? ] [ >compare-expr< \ ##compare-branch new-insn ] } { [ dup compare-imm-expr? ] [ >compare-imm-expr< \ ##compare-imm-branch new-insn ] } { [ dup compare-float-unordered-expr? ] [ >compare-expr< \ ##compare-float-unordered-branch new-insn ] } { [ dup compare-float-ordered-expr? ] [ >compare-expr< \ ##compare-float-ordered-branch new-insn ] } + { [ dup test-vector-expr? ] [ >test-vector-expr< \ ##test-vector-branch new-insn ] } } cond ; : tag-fixnum-expr? ( expr -- ? ) diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index b2750da3fa..ba33885aac 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -14,6 +14,8 @@ IN: compiler.cfg.value-numbering.tests [ ##compare-imm? ] [ ##compare-float-unordered? ] [ ##compare-float-ordered? ] + [ ##test-vector? ] + [ ##test-vector-branch? ] } 1|| [ f >>temp ] when ] map ; @@ -141,6 +143,22 @@ IN: compiler.cfg.value-numbering.tests } value-numbering-step trim-temps ] unit-test +[ + { + T{ ##peek f 1 D -1 } + T{ ##unbox-vector f 1111 1 float-4-rep } + T{ ##test-vector f 1 1111 f float-4-rep vcc-any } + T{ ##test-vector-branch f 1111 f float-4-rep vcc-any } + } +] [ + { + T{ ##peek f 1 D -1 } + T{ ##unbox-vector f 1111 1 float-4-rep } + T{ ##test-vector f 1 1111 2 float-4-rep vcc-any } + T{ ##compare-imm-branch f 1 5 cc/= } + } value-numbering-step trim-temps +] unit-test + ! Immediate operand conversion [ { diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 7c4c593d16..839e1aef05 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -235,6 +235,7 @@ CODEGEN: _compare-branch %compare-branch CODEGEN: _compare-imm-branch %compare-imm-branch CODEGEN: _compare-float-ordered-branch %compare-float-ordered-branch CODEGEN: _compare-float-unordered-branch %compare-float-unordered-branch +CODEGEN: _test-vector-branch %test-vector-branch CODEGEN: _dispatch %dispatch CODEGEN: _spill %spill CODEGEN: _reload %reload diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index ffe77671a7..41b3fed08d 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -224,7 +224,8 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) -HOOK: %test-vector cpu ( dst src temp rep vcc -- ) +HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) +HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 877afaa390..cca8e617ca 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -747,20 +747,36 @@ M: x86 %compare-vector-reps { sse4.1? { longlong-2-rep ulonglong-2-rep } } } available-reps ; -:: (%test-vector) ( dst temp mask vcc -- ) +:: %test-vector-mask ( dst temp mask vcc -- ) vcc { - { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] } - { vcc-none [ dst dst TEST dst temp \ CMOVE %boolean ] } - { vcc-all [ dst mask CMP dst temp \ CMOVE %boolean ] } + { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] } + { vcc-none [ dst dst TEST dst temp \ CMOVE %boolean ] } + { vcc-all [ dst mask CMP dst temp \ CMOVE %boolean ] } + { vcc-notall [ dst mask CMP dst temp \ CMOVNE %boolean ] } } case ; -M:: x86 %test-vector ( dst src temp rep vcc -- ) - dst src rep { +: %move-vector-mask ( dst src rep -- mask ) + { { double-2-rep [ MOVMSKPD HEX: 3 ] } { float-4-rep [ MOVMSKPS HEX: f ] } [ drop PMOVMSKB HEX: ffff ] - } case :> mask - dst temp mask vcc (%test-vector) ; + } case ; + +M:: x86 %test-vector ( dst src temp rep vcc -- ) + dst src rep %move-vector-mask :> mask + dst temp mask vcc %test-vector-mask ; + +:: %test-vector-mask-branch ( label temp mask vcc -- ) + vcc { + { vcc-any [ temp temp TEST label JNE ] } + { vcc-none [ temp temp TEST label JE ] } + { vcc-all [ temp mask CMP label JE ] } + { vcc-notall [ temp mask CMP label JNE ] } + } case ; + +M:: x86 %test-vector-branch ( label src temp rep vcc -- ) + temp src rep %move-vector-mask :> mask + label temp mask vcc %test-vector-mask-branch ; M: x86 %test-vector-reps { From d5c4ec53571df8e7e658ccb3d83a648c94574f8c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 21:24:14 -0500 Subject: [PATCH 10/19] add tests for v=, vany?, vall?, vnone? --- basis/math/vectors/simd/simd-tests.factor | 128 +++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index c676b9fe98..4ce1befdbc 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -161,7 +161,10 @@ CONSTANT: simd-classes : remove-special-words ( alist -- alist' ) ! These have their own tests later - { hlshift hrshift vshuffle vbroadcast } unique assoc-diff ; + { + hlshift hrshift vshuffle vbroadcast + v= vany? vall? vnone? + } unique assoc-diff ; : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip @@ -281,6 +284,129 @@ simd-classes [ ] unit-test ] each +"== Checking element tests" print + +[ { t f t f f f t f } ] +[ short-8{ 1 2 3 4 5 6 7 8 } short-8{ 1 0 3 -1 -2 -3 7 -4 } v= ] unit-test + +[ short-8{ t f t f f f t f } ] +[ short-8{ 1 2 3 4 5 6 7 8 } short-8{ 1 0 3 -1 -2 -3 7 -4 } [ { short-8 short-8 } declare v= ] compile-call ] unit-test + +[ { t f t f f f t f } ] +[ int-8{ 1 2 3 4 5 6 7 8 } int-8{ 1 0 3 -1 -2 -3 7 -4 } v= ] unit-test + +[ int-8{ t f t f f f t f } ] +[ int-8{ 1 2 3 4 5 6 7 8 } int-8{ 1 0 3 -1 -2 -3 7 -4 } [ { int-8 int-8 } declare v= ] compile-call ] unit-test + +[ int-4{ t f t f } ] +[ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } [ { int-4 int-4 } declare v= ] compile-call ] unit-test + +[ { t f t f } ] +[ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } v= ] unit-test + +[ int-4{ t f t f } ] +[ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } [ { int-4 int-4 } declare v= ] compile-call ] unit-test + +[ { t f t f } ] +[ float-4{ 1.0 0/0. 3.0 4.0 } float-4{ 1.0 0/0. 3.0 -1.0 } v= ] unit-test + +[ t ] +[ + float-4{ t f t f } + float-4{ 1.0 0/0. 3.0 4.0 } float-4{ 1.0 0/0. 3.0 -1.0 } [ { float-4 float-4 } declare v= ] compile-call + exact= +] unit-test + +[ { t f t f f t t t } ] +[ float-8{ 1.0 0/0. 3.0 4.0 5.0 6.0 7.0 8.0 } float-8{ 1.0 0/0. 3.0 -1.0 -2.0 6.0 7.0 8.0 } v= ] unit-test + +[ t ] +[ + float-8{ t f t f f t t t } + float-8{ 1.0 0/0. 3.0 4.0 5.0 6.0 7.0 8.0 } float-8{ 1.0 0/0. 3.0 -1.0 -2.0 6.0 7.0 8.0 } [ { float-8 float-8 } declare v= ] compile-call + exact= +] unit-test + +[ { f t } ] +[ double-2{ 0/0. 3.0 } double-2{ 0/0. 3.0 } v= ] unit-test + +[ t ] +[ + double-2{ f t } + double-2{ 0/0. 3.0 } double-2{ 0/0. 3.0 } [ { double-2 double-2 } declare v= ] compile-call + exact= +] unit-test + +:: test-vector-tests-bool ( vector declaration -- none? any? all? ) + vector + [ [ declaration declare vnone? ] compile-call ] + [ [ declaration declare vany? ] compile-call ] + [ [ declaration declare vall? ] compile-call ] tri ; inline + +: yes ( -- x ) t ; +: no ( -- x ) f ; + +:: test-vector-tests-branch ( vector declaration -- none? any? all? ) + vector + [ [ declaration declare vnone? [ yes ] [ no ] if ] compile-call ] + [ [ declaration declare vany? [ yes ] [ no ] if ] compile-call ] + [ [ declaration declare vall? [ yes ] [ no ] if ] compile-call ] tri ; inline + +SYMBOL: !!inconsistent!! + +: ?inconsistent ( a b -- ab/inconsistent ) + 2dup = [ drop ] [ 2drop !!inconsistent!! ] if ; + +:: test-vector-tests ( vector decl -- none? any? all? ) + vector decl test-vector-tests-bool :> bool-all :> bool-any :> bool-none + vector decl test-vector-tests-branch :> branch-all :> branch-any :> branch-none + + bool-none branch-none ?inconsistent + bool-any branch-any ?inconsistent + bool-all branch-all ?inconsistent ; inline + +[ f t t ] +[ float-4{ t t t t } { float-4 } test-vector-tests ] unit-test +[ f t f ] +[ float-4{ f t t t } { float-4 } test-vector-tests ] unit-test +[ t f f ] +[ float-4{ f f f f } { float-4 } test-vector-tests ] unit-test + +[ f t t ] +[ double-2{ t t } { double-2 } test-vector-tests ] unit-test +[ f t f ] +[ double-2{ f t } { double-2 } test-vector-tests ] unit-test +[ t f f ] +[ double-2{ f f } { double-2 } test-vector-tests ] unit-test + +[ f t t ] +[ int-4{ t t t t } { int-4 } test-vector-tests ] unit-test +[ f t f ] +[ int-4{ f t t t } { int-4 } test-vector-tests ] unit-test +[ t f f ] +[ int-4{ f f f f } { int-4 } test-vector-tests ] unit-test + +[ f t t ] +[ float-8{ t t t t t t t t } { float-8 } test-vector-tests ] unit-test +[ f t f ] +[ float-8{ f t t t t f t t } { float-8 } test-vector-tests ] unit-test +[ t f f ] +[ float-8{ f f f f f f f f } { float-8 } test-vector-tests ] unit-test + +[ f t t ] +[ double-4{ t t t t } { double-4 } test-vector-tests ] unit-test +[ f t f ] +[ double-4{ f t t f } { double-4 } test-vector-tests ] unit-test +[ t f f ] +[ double-4{ f f f f } { double-4 } test-vector-tests ] unit-test + +[ f t t ] +[ int-8{ t t t t t t t t } { int-8 } test-vector-tests ] unit-test +[ f t f ] +[ int-8{ f t t t t f f f } { int-8 } test-vector-tests ] unit-test +[ t f f ] +[ int-8{ f f f f f f f f } { int-8 } test-vector-tests ] unit-test + "== Checking element access" print ! Test element access -- it should box bignums for int-4 on x86 From 72986dc66d0bf2cd81f0d3579003b63bdd646018 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 22:42:28 -0500 Subject: [PATCH 11/19] vim: highlight @ and _, don't highlight things that aren't special anymore --- misc/vim/syntax/factor.vim | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/misc/vim/syntax/factor.vim b/misc/vim/syntax/factor.vim index 52e5825c7c..9161c14e12 100644 --- a/misc/vim/syntax/factor.vim +++ b/misc/vim/syntax/factor.vim @@ -23,7 +23,7 @@ else set iskeyword=!,@,33-35,%,$,38-64,A-Z,91-96,a-z,123-126,128-255 endif -syn cluster factorCluster contains=factorComment,factorKeyword,factorRepeat,factorConditional,factorBoolean,factorCompileDirective,factorString,factorTriString,factorSbuf,@factorNumber,@factorNumErr,factorDelimiter,factorChar,factorCharErr,factorBackslash,@factorWordOps,factorAlien,factorTuple,factorStruct +syn cluster factorCluster contains=factorComment,factorFryDirective,factorKeyword,factorRepeat,factorConditional,factorBoolean,factorCompileDirective,factorString,factorTriString,factorSbuf,@factorNumber,@factorNumErr,factorDelimiter,factorChar,factorCharErr,factorBackslash,@factorWordOps,factorAlien,factorTuple,factorStruct syn match factorTodo /\(TODO\|FIXME\|XXX\):\=/ contained syn match factorComment /\<#!\>.*/ contains=factorTodo @@ -44,9 +44,11 @@ syn region factorPGenericN matchgroup=factorPGenericNDelims start=/\/ end=/\\>/ contains=@factorDefnContents,factorPrivateDefn,factorPrivateMethod,factorPGeneric,factorPGenericN -syn keyword factorBoolean boolean f general-t t +syn keyword factorBoolean f t +syn match factorFryDirective /\<\(@\|_\)\>/ contained syn keyword factorCompileDirective inline foldable recursive +syn keyword factorKeyword boolean syn keyword factorKeyword or tuck 2bi 2tri while wrapper nip 4dip wrapper? bi* callstack>array both? hashcode die dupd callstack callstack? 3dup tri@ pick curry build ?execute 3bi prepose >boolean ?if clone eq? tri* ? = swapd call-clear 2over 2keep 3keep clear 2dup when not tuple? dup 2bi* 2tri* call tri-curry object bi@ do unless* if* loop bi-curry* drop when* assert= retainstack assert? -rot execute 2bi@ 2tri@ boa with either? 3drop bi curry? datastack until 3dip over 3curry roll tri-curry* swap tri-curry@ 2nip and throw set-retainstack bi-curry (clone) hashcode* compose spin 2dip if 3tri unless compose? tuple keep 2curry equal? set-datastack assert tri 2drop most boolean? identity-tuple? null new set-callstack dip bi-curry@ rot -roll xor identity-tuple boolean syn keyword factorKeyword ?at assoc? assoc-clone-like assoc= delete-at* assoc-partition extract-keys new-assoc value? assoc-size map>assoc push-at assoc-like key? assoc-intersect assoc-refine update assoc-union assoc-combine at* assoc-empty? at+ set-at assoc-all? assoc-subset? assoc-hashcode change-at assoc-each assoc-diff zip values value-at rename-at inc-at enum? at cache assoc>map assoc assoc-map enum value-at* remove-all assoc-map-as >alist assoc-filter-as substitute-here clear-assoc assoc-stack maybe-set-at substitute assoc-filter 2cache delete-at assoc-find keys assoc-any? unzip syn keyword factorKeyword case execute-effect no-cond no-case? 3cleave>quot 2cleave cond>quot wrong-values? no-cond? cleave>quot no-case case>quot 3cleave wrong-values to-fixed-point alist>quot case-find cond cleave call-effect 2cleave>quot recursive-hashcode linear-case-quot spread spread>quot @@ -190,6 +192,7 @@ if version >= 508 || !exists("did_factor_syn_inits") HiLink factorConditional Conditional HiLink factorKeyword Keyword HiLink factorOperator Operator + HiLink factorFryDirective Operator HiLink factorBoolean Boolean HiLink factorDefnDelims Typedef HiLink factorMethodDelims Typedef From 31f39ce32fb91f7de31b33d666a89e4d74fe1968 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 1 Oct 2009 23:03:17 -0500 Subject: [PATCH 12/19] compiler.cfg.alias-analysis: optimize ##vm-field-ptr and ##alien-global instructions, and optimize out ##compare between values of different alias classes; this optimizes '[ [ >float ] bi@ [ + ] [ - ] 2bi eq? ]' down to an o-op and removes boxing from '[ [ >float ] bi@ [ + ] [ - ] 2bi = ]' --- .../cfg/alias-analysis/alias-analysis.factor | 56 ++++++++++++++++--- .../cfg/instructions/instructions.factor | 2 +- .../cfg/write-barrier/write-barrier.factor | 2 +- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/basis/compiler/cfg/alias-analysis/alias-analysis.factor b/basis/compiler/cfg/alias-analysis/alias-analysis.factor index 680ce42259..6e915ebcb4 100644 --- a/basis/compiler/cfg/alias-analysis/alias-analysis.factor +++ b/basis/compiler/cfg/alias-analysis/alias-analysis.factor @@ -1,10 +1,17 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel math namespaces assocs hashtables sequences arrays -accessors vectors combinators sets classes cpu.architecture -compiler.cfg compiler.cfg.registers compiler.cfg.instructions -compiler.cfg.def-use compiler.cfg.copy-prop compiler.cfg.rpo -compiler.cfg.liveness ; +accessors words vectors combinators combinators.short-circuit +sets classes layouts cpu.architecture +compiler.cfg +compiler.cfg.rpo +compiler.cfg.def-use +compiler.cfg.liveness +compiler.cfg.copy-prop +compiler.cfg.registers +compiler.cfg.comparisons +compiler.cfg.instructions +compiler.cfg.representations.preferred ; IN: compiler.cfg.alias-analysis ! We try to eliminate redundant slot operations using some simple heuristics. @@ -77,10 +84,15 @@ SYMBOL: acs>vregs : ac>vregs ( ac -- vregs ) acs>vregs get at ; -: aliases ( vreg -- vregs ) +GENERIC: aliases ( vreg -- vregs ) + +M: integer aliases #! All vregs which may contain the same value as vreg. vreg>ac ac>vregs ; +M: word aliases + 1array ; + : each-alias ( vreg quot -- ) [ aliases ] dip each ; inline @@ -181,7 +193,6 @@ SYMBOL: constants #! assigned by an ##load-immediate. resolve constants get at ; -! We treat slot accessors and stack traffic alike GENERIC: insn-slot# ( insn -- slot#/f ) GENERIC: insn-object ( insn -- vreg ) @@ -190,7 +201,7 @@ M: ##slot-imm insn-slot# slot>> ; M: ##set-slot insn-slot# slot>> constant ; M: ##set-slot-imm insn-slot# slot>> ; M: ##alien-global insn-slot# [ library>> ] [ symbol>> ] bi 2array ; -M: ##vm-field-ptr insn-slot# field-name>> ; ! is this right? +M: ##vm-field-ptr insn-slot# field-name>> ; M: ##slot insn-object obj>> resolve ; M: ##slot-imm insn-object obj>> resolve ; @@ -206,18 +217,33 @@ M: ##vm-field-ptr insn-object drop \ ##vm-field-ptr ; H{ } clone live-slots set H{ } clone constants set H{ } clone copies set - + 0 ac-counter set next-ac heap-ac set + \ ##vm-field-ptr set-new-ac + \ ##alien-global set-new-ac + dup local-live-in [ set-heap-ac ] each ; GENERIC: analyze-aliases* ( insn -- insn' ) M: insn analyze-aliases* - dup defs-vreg [ set-heap-ac ] when* ; + ! If an instruction defines a value with a non-integer + ! representation it means that the value will be boxed + ! anywhere its used as a tagged pointer. Boxing allocates + ! a new value, except boxing instructions haven't been + ! inserted yet. + dup defs-vreg [ + over defs-vreg-rep int-rep eq? + [ set-heap-ac ] [ set-new-ac ] if + ] when* ; + +M: ##phi analyze-aliases* + dup defs-vreg set-heap-ac ; M: ##load-immediate analyze-aliases* + call-next-method dup [ val>> ] [ dst>> ] bi constants get set-at ; M: ##allocation analyze-aliases* @@ -249,6 +275,18 @@ M: ##copy analyze-aliases* #! vreg, since they both contain the same value. dup record-copy ; +: useless-compare? ( insn -- ? ) + { + [ cc>> cc= eq? ] + [ [ src1>> vreg>ac ] [ src2>> vreg>ac ] bi = not ] + } 1&& ; inline + +M: ##compare analyze-aliases* + dup useless-compare? [ + dst>> \ f tag-number \ ##load-immediate new-insn + analyze-aliases* + ] when ; + : analyze-aliases ( insns -- insns' ) [ insn# set analyze-aliases* ] map-index sift ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index e69516dded..7b2d8ef9b8 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -721,7 +721,7 @@ UNION: ##allocation ##box-displaced-alien ; ! For alias analysis -UNION: ##read ##slot ##slot-imm ; +UNION: ##read ##slot ##slot-imm ##vm-field-ptr ##alien-global ; UNION: ##write ##set-slot ##set-slot-imm ; ! Instructions that kill all live vregs but cannot trigger GC diff --git a/basis/compiler/cfg/write-barrier/write-barrier.factor b/basis/compiler/cfg/write-barrier/write-barrier.factor index 97b0c27af1..778d0526d5 100644 --- a/basis/compiler/cfg/write-barrier/write-barrier.factor +++ b/basis/compiler/cfg/write-barrier/write-barrier.factor @@ -75,7 +75,7 @@ M: insn remove-dead-barrier drop t ; ! Anticipation of this and set-slot would help too, maybe later FORWARD-ANALYSIS: slot -UNION: access ##read ##write ; +UNION: access ##slot ##slot-imm ##set-slot ##set-slot-imm ; M: slot-analysis transfer-set drop [ H{ } assoc-clone-like ] dip From 952498ef691d9e1e2dc633f5b6103d02755dac8a Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 23:07:10 -0500 Subject: [PATCH 13/19] create special intrinsic wrappers for 256-vector>scalar operations so that vall?, vany?, vnone? work on 256-vectors --- .../math/vectors/simd/functor/functor.factor | 67 ++++++++++++------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 6a7771c2c3..6439069fc7 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -5,7 +5,8 @@ functors generalizations kernel literals locals math math.functions math.vectors math.vectors.private math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture -namespaces arrays quotations combinators sets layouts ; +namespaces arrays quotations combinators combinators.short-circuit sets +layouts ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd.functor @@ -95,14 +96,17 @@ MACRO: simd-nth ( rep -- x ) '[ nip _ swap supported-simd-op? ] assoc-filter '[ drop _ key? ] assoc-filter ; -ERROR: bad-schema schema ; +ERROR: bad-schema op schema ; -: low-level-ops ( simd-ops alist -- alist' ) - '[ - 1quotation - over word-schema _ ?at [ bad-schema ] unless - [ ] 2sequence - ] assoc-map ; +:: op-wrapper ( op specials schemas -- wrapper ) + op { + [ specials at ] + [ word-schema schemas at ] + [ dup word-schema bad-schema ] + } 1|| ; + +: low-level-ops ( simd-ops specials schemas -- alist ) + '[ 1quotation over _ _ op-wrapper [ ] 2sequence ] assoc-map ; :: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. @@ -126,14 +130,14 @@ ERROR: bad-schema schema ; ! in the general case. elt-class float = [ { distance [ v- norm ] } suffix ] when ; -TUPLE: simd class elt-class ops wrappers ctor rep ; +TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ; : define-simd ( simd -- ) dup rep>> rep-component-type c:c-type-boxed-class >>elt-class { [ class>> ] [ elt-class>> ] - [ [ ops>> ] [ wrappers>> ] bi low-level-ops ] + [ [ ops>> ] [ special-wrappers>> ] [ schema-wrappers>> ] tri low-level-ops ] [ rep>> supported-simd-ops ] [ [ ctor>> ] [ elt-class>> ] bi high-level-ops assoc-union ] } cleave @@ -262,7 +266,7 @@ simd new { { +vector+ -> +vector+ } A-v->v-op } { { +vector+ -> +scalar+ } A-v->n-op } { { +vector+ -> +nonnegative+ } A-v->n-op } - } >>wrappers + } >>schema-wrappers (define-simd-128) PRIVATE> @@ -318,9 +322,12 @@ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op -A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op -A-v->n-op DEFINES-PRIVATE ${A}-v->n-op +A-v.-op DEFINES-PRIVATE ${A}-v.-op +(A-v->n-op) DEFINES-PRIVATE (${A}-v->v-op) +A-sum-op DEFINES-PRIVATE ${A}-sum-op +A-vany-op DEFINES-PRIVATE ${A}-vany-op +A-vall-op DEFINES-PRIVATE ${A}-vall-op WHERE @@ -393,32 +400,44 @@ INSTANCE: A sequence [ [ [ underlying2>> ] dip A-rep ] dip call ] 3bi \ A boa ; inline -: A-vv->n-op ( v1 v2 quot -- v3 ) - [ [ [ underlying1>> ] bi@ A-rep ] dip call ] - [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi - + ; inline - : A-v->v-op ( v1 combine-quot -- v2 ) [ [ underlying1>> A-rep ] dip call ] [ [ underlying2>> A-rep ] dip call ] 2bi \ A boa ; inline -: A-v->n-op ( v1 combine-quot -- v2 ) - [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline +: A-v.-op ( v1 v2 quot -- n ) + [ [ [ underlying1>> ] bi@ A-rep ] dip call ] + [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi + + ; inline + +: (A-v->n-op) ( v1 quot reduce-quot -- n ) + '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ A-rep ] dip call ; inline + +: A-sum-op ( v1 quot -- n ) + [ (simd-v+) ] (A-v->n-op) ; inline + +: A-vany-op ( v1 quot -- n ) + [ (simd-vbitor) ] (A-v->n-op) ; inline +: A-vall-op ( v1 quot -- n ) + [ (simd-vbitand) ] (A-v->n-op) ; inline simd new \ A >>class \ A-with >>ctor \ A-rep >>rep + { + { v. A-v.-op } + { sum A-sum-op } + { vnone? A-vany-op } + { vany? A-vany-op } + { vall? A-vall-op } + } >>special-wrappers { { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } { { +vector+ +literal+ -> +vector+ } A-vn->v-op } - { { +vector+ +vector+ -> +scalar+ } A-vv->n-op } { { +vector+ -> +vector+ } A-v->v-op } - { { +vector+ -> +scalar+ } A-v->n-op } - { { +vector+ -> +nonnegative+ } A-v->n-op } - } >>wrappers + } >>schema-wrappers (define-simd-256) ;FUNCTOR From 53b265f6829c373c504b6225ac30beddc85fed37 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 23:10:11 -0500 Subject: [PATCH 14/19] Merge branch 'master' of git://factorcode.org/git/factor Conflicts: basis/compiler/codegen/codegen.factor --- basis/checksums/md5/md5-tests.factor | 9 ++++- .../compiler/cfg/builder/builder-tests.factor | 13 +++++-- .../cfg/gc-checks/gc-checks-tests.factor | 2 +- basis/compiler/cfg/hats/hats.factor | 15 ------- .../cfg/instructions/instructions.factor | 23 ----------- .../cfg/intrinsics/alien/alien.factor | 8 +++- .../cfg/intrinsics/allot/allot.factor | 9 +++++ .../cfg/intrinsics/slots/slots.factor | 3 ++ .../representations/representations.factor | 39 ++++++++++++++----- .../value-numbering-tests.factor | 10 ++--- basis/compiler/codegen/codegen.factor | 4 -- basis/compiler/tests/low-level-ir.factor | 14 ------- basis/cpu/architecture/architecture.factor | 6 --- basis/cpu/ppc/ppc.factor | 16 +++----- basis/cpu/x86/x86.factor | 17 -------- .../math/vectors/simd/functor/functor.factor | 16 +++----- basis/random/random-tests.factor | 3 ++ basis/random/random.factor | 2 +- basis/unix/statfs/openbsd/openbsd.factor | 5 ++- 19 files changed, 86 insertions(+), 128 deletions(-) diff --git a/basis/checksums/md5/md5-tests.factor b/basis/checksums/md5/md5-tests.factor index 730c0b8516..45dc253c86 100644 --- a/basis/checksums/md5/md5-tests.factor +++ b/basis/checksums/md5/md5-tests.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: byte-arrays checksums checksums.md5 io.encodings.binary -io.streams.byte-array kernel math namespaces tools.test ; +io.streams.byte-array kernel math namespaces tools.test +sequences ; IN: checksums.md5.tests [ "d41d8cd98f00b204e9800998ecf8427e" ] [ "" >byte-array md5 checksum-bytes hex-string ] unit-test @@ -33,3 +34,9 @@ IN: checksums.md5.tests "asdf" binary add-checksum-stream [ get-checksum ] [ get-checksum ] bi = ] unit-test + +[ + t +] [ + { "abcd" "efg" } md5 checksum-lines length 16 = +] unit-test diff --git a/basis/compiler/cfg/builder/builder-tests.factor b/basis/compiler/cfg/builder/builder-tests.factor index 9a77ee4017..d303cc597f 100644 --- a/basis/compiler/cfg/builder/builder-tests.factor +++ b/basis/compiler/cfg/builder/builder-tests.factor @@ -159,9 +159,12 @@ IN: compiler.cfg.builder.tests { pinned-c-ptr class fixnum } \ set-alien-cell '[ _ declare _ execute ] unit-test-cfg ] each -: contains-insn? ( quot insn-check -- ? ) +: count-insns ( quot insn-check -- ? ) [ test-mr [ instructions>> ] map ] dip - '[ _ any? ] any? ; inline + '[ _ count ] sigma ; inline + +: contains-insn? ( quot insn-check -- ? ) + count-insns 0 > ; inline [ t ] [ [ swap ] [ ##replace? ] contains-insn? ] unit-test @@ -197,14 +200,16 @@ IN: compiler.cfg.builder.tests [ f t ] [ [ { byte-array fixnum } declare alien-cell 4 alien-float ] [ [ ##box-alien? ] contains-insn? ] - [ [ ##box-float? ] contains-insn? ] bi + [ [ ##allot? ] contains-insn? ] bi ] unit-test [ f t ] [ [ { byte-array fixnum } declare alien-cell { simple-alien } declare 4 alien-float ] [ [ ##box-alien? ] contains-insn? ] - [ [ ##box-float? ] contains-insn? ] bi + [ [ ##allot? ] contains-insn? ] bi ] unit-test + + [ 1 ] [ [ dup float+ ] [ ##alien-double? ] count-insns ] unit-test ] when ! Regression. Make sure everything is inlined correctly diff --git a/basis/compiler/cfg/gc-checks/gc-checks-tests.factor b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor index 5580de9a47..27d37b115f 100644 --- a/basis/compiler/cfg/gc-checks/gc-checks-tests.factor +++ b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor @@ -16,7 +16,7 @@ V{ } 0 test-bb V{ - T{ ##box-float f 0 1 } + T{ ##box-alien f 0 1 } } 1 test-bb 0 1 edge diff --git a/basis/compiler/cfg/hats/hats.factor b/basis/compiler/cfg/hats/hats.factor index cf5c0095ca..42aa5512bc 100644 --- a/basis/compiler/cfg/hats/hats.factor +++ b/basis/compiler/cfg/hats/hats.factor @@ -49,24 +49,9 @@ insn-classes get [ [ ##load-reference ] } cond ; -: ^^unbox-c-ptr ( src class -- dst ) - [ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ; - -: ^^allot-tuple ( n -- dst ) - 2 + cells tuple ^^allot ; - -: ^^allot-array ( n -- dst ) - 2 + cells array ^^allot ; - -: ^^allot-byte-array ( n -- dst ) - 2 cells + byte-array ^^allot ; - : ^^offset>slot ( slot -- vreg' ) cell 4 = [ 1 ^^shr-imm ] [ any-rep ^^copy ] if ; -: ^^tag-offset>slot ( slot tag -- vreg' ) - [ ^^offset>slot ] dip ^^sub-imm ; - : ^^tag-fixnum ( src -- dst ) tag-bits get ^^shl-imm ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index d40aabcb19..68cd173683 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -199,15 +199,6 @@ def: dst/int-rep use: src/int-rep ; ! Float arithmetic -PURE-INSN: ##unbox-float -def: dst/double-rep -use: src/int-rep ; - -PURE-INSN: ##box-float -def: dst/int-rep -use: src/double-rep -temp: temp/int-rep ; - PURE-INSN: ##add-float def: dst/double-rep use: src1/double-rep src2/double-rep ; @@ -266,18 +257,6 @@ def: dst/double-rep use: src/int-rep ; ! SIMD operations - -PURE-INSN: ##box-vector -def: dst/int-rep -use: src -literal: rep -temp: temp/int-rep ; - -PURE-INSN: ##unbox-vector -def: dst -use: src/int-rep -literal: rep ; - PURE-INSN: ##zero-vector def: dst literal: rep ; @@ -760,8 +739,6 @@ literal: n ; UNION: ##allocation ##allot -##box-float -##box-vector ##box-alien ##box-displaced-alien ; diff --git a/basis/compiler/cfg/intrinsics/alien/alien.factor b/basis/compiler/cfg/intrinsics/alien/alien.factor index bc6baa21b7..a37e100c3e 100644 --- a/basis/compiler/cfg/intrinsics/alien/alien.factor +++ b/basis/compiler/cfg/intrinsics/alien/alien.factor @@ -3,8 +3,9 @@ USING: accessors kernel sequences alien math classes.algebra fry locals combinators combinators.short-circuit cpu.architecture compiler.tree.propagation.info compiler.cfg.hats -compiler.cfg.stacks compiler.cfg.instructions -compiler.cfg.utilities compiler.cfg.builder.blocks ; +compiler.cfg.registers compiler.cfg.stacks +compiler.cfg.instructions compiler.cfg.utilities +compiler.cfg.builder.blocks ; IN: compiler.cfg.intrinsics.alien : emit-? ( node -- ? ) @@ -33,6 +34,9 @@ IN: compiler.cfg.intrinsics.alien [ second class>> fixnum class<= ] bi and ; +: ^^unbox-c-ptr ( src class -- dst ) + [ next-vreg dup ] 2dip next-vreg ##unbox-c-ptr ; + : prepare-alien-accessor ( info -- ptr-vreg offset ) class>> [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add 0 ; diff --git a/basis/compiler/cfg/intrinsics/allot/allot.factor b/basis/compiler/cfg/intrinsics/allot/allot.factor index d4aa2750c0..6ad5450bfc 100644 --- a/basis/compiler/cfg/intrinsics/allot/allot.factor +++ b/basis/compiler/cfg/intrinsics/allot/allot.factor @@ -18,6 +18,9 @@ IN: compiler.cfg.intrinsics.allot : tuple-slot-regs ( layout -- vregs ) [ second ds-load ] [ ^^load-literal ] bi prefix ; +: ^^allot-tuple ( n -- dst ) + 2 + cells tuple ^^allot ; + : emit- ( node -- ) dup node-input-infos last literal>> dup array? [ @@ -36,6 +39,9 @@ IN: compiler.cfg.intrinsics.allot : expand-? ( obj -- ? ) dup integer? [ 0 8 between? ] [ drop f ] if ; +: ^^allot-array ( n -- dst ) + 2 + cells array ^^allot ; + :: emit- ( node -- ) [let | len [ node node-input-infos first literal>> ] | len expand-? [ @@ -54,6 +60,9 @@ IN: compiler.cfg.intrinsics.allot : bytes>cells ( m -- n ) cell align cell /i ; +: ^^allot-byte-array ( n -- dst ) + 2 cells + byte-array ^^allot ; + : emit-allot-byte-array ( len -- dst ) ds-drop dup ^^allot-byte-array diff --git a/basis/compiler/cfg/intrinsics/slots/slots.factor b/basis/compiler/cfg/intrinsics/slots/slots.factor index 07202ae60b..8ee1c41cfb 100644 --- a/basis/compiler/cfg/intrinsics/slots/slots.factor +++ b/basis/compiler/cfg/intrinsics/slots/slots.factor @@ -8,6 +8,9 @@ IN: compiler.cfg.intrinsics.slots : value-tag ( info -- n ) class>> class-tag ; inline +: ^^tag-offset>slot ( slot tag -- vreg' ) + [ ^^offset>slot ] dip ^^sub-imm ; + : (emit-slot) ( infos -- dst ) [ 2inputs ] [ first value-tag ] bi* ^^tag-offset>slot ^^slot ; diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index 423f415742..a2311ca964 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -1,8 +1,10 @@ ! Copyright (C) 2009 Slava Pestov ! See http://factorcode.org/license.txt for BSD license. USING: kernel fry accessors sequences assocs sets namespaces -arrays combinators combinators.short-circuit make locals deques -dlists layouts cpu.architecture compiler.utilities +arrays combinators combinators.short-circuit math make locals +deques dlists layouts byte-arrays cpu.architecture +compiler.utilities +compiler.constants compiler.cfg compiler.cfg.rpo compiler.cfg.hats @@ -25,24 +27,31 @@ GENERIC: emit-unbox ( dst src rep -- ) M:: float-rep emit-box ( dst src rep -- ) double-rep next-vreg-rep :> temp temp src ##single>double-float - dst temp int-rep next-vreg-rep ##box-float ; + dst temp double-rep emit-box ; M:: float-rep emit-unbox ( dst src rep -- ) double-rep next-vreg-rep :> temp - temp src ##unbox-float + temp src double-rep emit-unbox dst temp ##double>single-float ; M: double-rep emit-box - drop int-rep next-vreg-rep ##box-float ; + drop + [ drop 16 float int-rep next-vreg-rep ##allot ] + [ float-offset swap ##set-alien-double ] + 2bi ; M: double-rep emit-unbox - drop ##unbox-float ; + drop float-offset ##alien-double ; -M: vector-rep emit-box - int-rep next-vreg-rep ##box-vector ; +M:: vector-rep emit-box ( dst src rep -- ) + int-rep next-vreg-rep :> temp + dst 16 2 cells + byte-array int-rep next-vreg-rep ##allot + temp 16 tag-fixnum ##load-immediate + temp dst 1 byte-array tag-number ##set-slot-imm + dst byte-array-offset src rep ##set-alien-vector ; M: vector-rep emit-unbox - ##unbox-vector ; + [ byte-array-offset ] dip ##alien-vector ; M:: scalar-rep emit-box ( dst src rep -- ) int-rep next-vreg-rep :> temp @@ -143,6 +152,9 @@ SYMBOL: costs ! Insert conversions. This introduces new temporaries, so we need ! to rename opearands too. +! Mapping from vreg,rep pairs to vregs +SYMBOL: alternatives + :: emit-def-conversion ( dst preferred required -- new-dst' ) ! If an instruction defines a register with representation 'required', ! but the register has preferred representation 'preferred', then @@ -155,7 +167,13 @@ SYMBOL: costs ! but the register has preferred representation 'preferred', then ! we rename the instruction's input to a new register, which ! becomes the output of a conversion instruction. - required next-vreg-rep [ src required preferred emit-conversion ] keep ; + preferred required eq? [ src ] [ + src required alternatives get [ + required next-vreg-rep :> new-src + [ new-src ] 2dip preferred emit-conversion + new-src + ] 2cache + ] if ; SYMBOLS: renaming-set needs-renaming? ; @@ -236,6 +254,7 @@ M: insn conversions-for-insn , ; dup kill-block? [ drop ] [ [ [ + H{ } clone alternatives set [ conversions-for-insn ] each ] V{ } make ] change-instructions drop diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index ba33885aac..00e2d33fb4 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -109,19 +109,15 @@ IN: compiler.cfg.value-numbering.tests { T{ ##peek f 8 D 0 } T{ ##peek f 9 D -1 } - T{ ##unbox-float f 10 8 } - T{ ##unbox-float f 11 9 } - T{ ##compare-float-unordered f 12 10 11 cc< } - T{ ##compare-float-unordered f 14 10 11 cc/< } + T{ ##compare-float-unordered f 12 8 9 cc< } + T{ ##compare-float-unordered f 14 8 9 cc/< } T{ ##replace f 14 D 0 } } ] [ { T{ ##peek f 8 D 0 } T{ ##peek f 9 D -1 } - T{ ##unbox-float f 10 8 } - T{ ##unbox-float f 11 9 } - T{ ##compare-float-unordered f 12 10 11 cc< } + T{ ##compare-float-unordered f 12 8 9 cc< } T{ ##compare-imm f 14 12 5 cc= } T{ ##replace f 14 D 0 } } value-numbering-step trim-temps diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 839e1aef05..6352401bfd 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -146,8 +146,6 @@ CODEGEN: ##not %not CODEGEN: ##neg %neg CODEGEN: ##log2 %log2 CODEGEN: ##copy %copy -CODEGEN: ##unbox-float %unbox-float -CODEGEN: ##box-float %box-float CODEGEN: ##add-float %add-float CODEGEN: ##sub-float %sub-float CODEGEN: ##mul-float %mul-float @@ -161,14 +159,12 @@ CODEGEN: ##single>double-float %single>double-float CODEGEN: ##double>single-float %double>single-float CODEGEN: ##integer>float %integer>float CODEGEN: ##float>integer %float>integer -CODEGEN: ##unbox-vector %unbox-vector CODEGEN: ##zero-vector %zero-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##compare-vector %compare-vector CODEGEN: ##test-vector %test-vector -CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##saturated-add-vector %saturated-add-vector CODEGEN: ##add-sub-vector %add-sub-vector diff --git a/basis/compiler/tests/low-level-ir.factor b/basis/compiler/tests/low-level-ir.factor index 0b2da64636..e508b55b8d 100644 --- a/basis/compiler/tests/low-level-ir.factor +++ b/basis/compiler/tests/low-level-ir.factor @@ -46,20 +46,6 @@ IN: compiler.tests.low-level-ir } compile-test-bb ] unit-test -! ##copy on floats. We can only run this test if float intrinsics -! are enabled. -\ float+ "intrinsic" word-prop [ - [ 1.5 ] [ - V{ - T{ ##load-reference f 4 1.5 } - T{ ##unbox-float f 1 4 } - T{ ##copy f 2 1 double-rep } - T{ ##box-float f 3 2 } - T{ ##copy f 0 3 int-rep } - } compile-test-bb - ] unit-test -] when - ! make sure slot access works when the destination is ! one of the sources [ t ] [ diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 41b3fed08d..eecfd13e66 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -197,9 +197,6 @@ HOOK: %fixnum-add cpu ( label dst src1 src2 -- ) HOOK: %fixnum-sub cpu ( label dst src1 src2 -- ) HOOK: %fixnum-mul cpu ( label dst src1 src2 -- ) -HOOK: %unbox-float cpu ( dst src -- ) -HOOK: %box-float cpu ( dst src temp -- ) - HOOK: %add-float cpu ( dst src1 src2 -- ) HOOK: %sub-float cpu ( dst src1 src2 -- ) HOOK: %mul-float cpu ( dst src1 src2 -- ) @@ -216,9 +213,6 @@ HOOK: %double>single-float cpu ( dst src -- ) HOOK: %integer>float cpu ( dst src -- ) HOOK: %float>integer cpu ( dst src -- ) -HOOK: %box-vector cpu ( dst src temp rep -- ) -HOOK: %unbox-vector cpu ( dst src rep -- ) - HOOK: %zero-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index cfad8259d8..7336d22544 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -230,12 +230,6 @@ M: ppc %copy ( dst src rep -- ) } case ] if ; -M: ppc %unbox-float ( dst src -- ) float-offset LFD ; - -M:: ppc %box-float ( dst src temp -- ) - dst 16 float temp %allot - src dst float-offset STFD ; - GENERIC: float-function-param* ( dst src -- ) M: spill-slot float-function-param* [ 1 ] dip n>> spill@ LFD ; @@ -399,13 +393,13 @@ M: ppc %alien-cell LWZ ; M: ppc %alien-float LFS ; M: ppc %alien-double LFD ; -M: ppc %set-alien-integer-1 swapd STB ; -M: ppc %set-alien-integer-2 swapd STH ; +M: ppc %set-alien-integer-1 -rot STB ; +M: ppc %set-alien-integer-2 -rot STH ; -M: ppc %set-alien-cell swapd STW ; +M: ppc %set-alien-cell -rot STW ; -M: ppc %set-alien-float swapd STFS ; -M: ppc %set-alien-double swapd STFD ; +M: ppc %set-alien-float -rot STFS ; +M: ppc %set-alien-double -rot STFD ; : load-zone-ptr ( reg -- ) "nursery" %load-vm-field-addr ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index cca8e617ca..48c97e9322 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -474,13 +474,6 @@ M: x86 %double>single-float CVTSD2SS ; M: x86 %integer>float CVTSI2SD ; M: x86 %float>integer CVTTSD2SI ; -M: x86 %unbox-float ( dst src -- ) - float-offset [+] MOVSD ; - -M:: x86 %box-float ( dst src temp -- ) - dst 16 float temp %allot - dst float-offset [+] src MOVSD ; - : %cmov-float= ( dst src -- ) [ "no-move" define-label @@ -561,16 +554,6 @@ M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- ) M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- ) \ UCOMISD (%compare-float-branch) ; -M:: x86 %box-vector ( dst src rep temp -- ) - dst rep rep-size 2 cells + byte-array temp %allot - 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm - dst byte-array-offset [+] - src rep %copy ; - -M:: x86 %unbox-vector ( dst src rep -- ) - dst src byte-array-offset [+] - rep %copy ; - MACRO: available-reps ( alist -- ) ! Each SSE version adds new representations and supports ! all old ones diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 6439069fc7..fa1f208188 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -8,6 +8,7 @@ sequences.private strings words definitions macros cpu.architecture namespaces arrays quotations combinators combinators.short-circuit sets layouts ; QUALIFIED-WITH: alien.c-types c +QUALIFIED: math.private IN: math.vectors.simd.functor ERROR: bad-length got expected ; @@ -36,8 +37,8 @@ MACRO: simd-boa ( rep class -- simd-array ) : can-be-unboxed? ( type -- ? ) { - { c:float [ t ] } - { c:double [ t ] } + { c:float [ \ math.private:float+ "intrinsic" word-prop ] } + { c:double [ \ math.private:float+ "intrinsic" word-prop ] } [ c:heap-size cell < ] } case ; @@ -57,7 +58,7 @@ MACRO: simd-boa ( rep class -- simd-array ) : simd-with ( rep class x -- simd-array ) [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline -: simd-with-fast? ( rep -- ? ) +: simd-with/nth-fast? ( rep -- ? ) [ \ (simd-vshuffle) supported-simd-op? ] [ rep-component-type can-be-unboxed? ] bi and ; @@ -65,16 +66,11 @@ MACRO: simd-boa ( rep class -- simd-array ) :: define-with-custom-inlining ( word rep class -- ) word [ drop - rep simd-with-fast? [ + rep simd-with/nth-fast? [ [ rep rep-coerce rep (simd-with) class boa ] ] [ word def>> ] if ] "custom-inlining" set-word-prop ; -: simd-nth-fast? ( rep -- ? ) - [ \ (simd-vshuffle) supported-simd-op? ] - [ rep-component-type can-be-unboxed? ] - bi and ; - : simd-nth-fast ( rep -- quot ) [ rep-components ] keep '[ swap _ '[ _ _ (simd-select) ] 2array ] map-index @@ -84,7 +80,7 @@ MACRO: simd-boa ( rep class -- simd-array ) rep-component-type dup c:c-type-getter-boxer c:array-accessor ; MACRO: simd-nth ( rep -- x ) - dup simd-nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ; + dup simd-with/nth-fast? [ simd-nth-fast ] [ simd-nth-slow ] if ; : boa-effect ( rep n -- effect ) [ rep-components ] dip * diff --git a/basis/random/random-tests.factor b/basis/random/random-tests.factor index da8d4a1844..96dc8cc783 100644 --- a/basis/random/random-tests.factor +++ b/basis/random/random-tests.factor @@ -30,3 +30,6 @@ IN: random.tests [ 3 ] [ { 1 2 3 4 } 3 sample prune length ] unit-test [ 99 ] [ 100 99 sample prune length ] unit-test + +[ ] +[ [ 100 random-bytes ] with-system-random drop ] unit-test diff --git a/basis/random/random.factor b/basis/random/random.factor index 1f2408556f..197c232404 100755 --- a/basis/random/random.factor +++ b/basis/random/random.factor @@ -22,7 +22,7 @@ M: object random-bytes* ( n tuple -- byte-array ) [ 2drop ] [ random-32* 4 >le swap head over push-all ] if ] bi-curry bi* ; -M: object random-32* ( tuple -- r ) 4 random-bytes* le> ; +M: object random-32* ( tuple -- r ) 4 swap random-bytes* le> ; ERROR: no-random-number-generator ; diff --git a/basis/unix/statfs/openbsd/openbsd.factor b/basis/unix/statfs/openbsd/openbsd.factor index cd720d74d4..4e65e74c2c 100644 --- a/basis/unix/statfs/openbsd/openbsd.factor +++ b/basis/unix/statfs/openbsd/openbsd.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types alien.syntax unix.types unix.stat classes.struct ; +USING: alien.c-types alien.syntax unix.types classes.struct +unix.stat ; IN: unix.statfs.openbsd CONSTANT: MFSNAMELEN 16 @@ -30,4 +31,4 @@ STRUCT: statfs { f_mntfromname { char MNAMELEN } } { mount_info char[160] } ; -FUNCTION: int statfs ( char* path, statvfs* buf ) ; +FUNCTION: int statfs ( char* path, statfs* buf ) ; From 2a4475f85ebaf3a65aa24929a3e19f86b2d5c418 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 1 Oct 2009 23:20:05 -0500 Subject: [PATCH 15/19] Fix alias analysis of a ##compare whose input is another ##compare --- basis/compiler/cfg/alias-analysis/alias-analysis.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/compiler/cfg/alias-analysis/alias-analysis.factor b/basis/compiler/cfg/alias-analysis/alias-analysis.factor index 6e915ebcb4..0c22d24a2c 100644 --- a/basis/compiler/cfg/alias-analysis/alias-analysis.factor +++ b/basis/compiler/cfg/alias-analysis/alias-analysis.factor @@ -282,6 +282,7 @@ M: ##copy analyze-aliases* } 1&& ; inline M: ##compare analyze-aliases* + call-next-method dup useless-compare? [ dst>> \ f tag-number \ ##load-immediate new-insn analyze-aliases* From 01736e9bec6ea0ae616696742b55b256d403557c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 23:46:37 -0500 Subject: [PATCH 16/19] define simd equal? methods as v= vall? --- basis/math/vectors/simd/functor/functor.factor | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index fa1f208188..878d4aea70 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -186,6 +186,9 @@ M: A clone underlying>> clone \ A boa ; inline M: A length drop N ; inline +M: A equal? + over \ A instance? [ v= vall? ] [ 2drop f ] if ; + M: A nth-unsafe underlying>> A-rep simd-nth ; inline M: A set-nth-unsafe @@ -204,8 +207,6 @@ M: A new-sequence [ N bad-length ] if ; inline -M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; - M: A c:byte-length underlying>> length ; inline M: A element-type drop A-rep rep-component-type ; @@ -340,6 +341,9 @@ M: A clone M: A length drop N ; inline +M: A equal? + over \ A instance? [ v= vall? ] [ 2drop f ] if ; + : A-deref ( n seq -- n' seq' ) over N/2 < [ underlying1>> ] [ [ N/2 - ] dip underlying2>> ] if \ A/2 boa ; inline @@ -357,8 +361,6 @@ M: A new-sequence [ N bad-length ] if ; inline -M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; - M: A c:byte-length drop 32 ; inline M: A element-type drop A-rep rep-component-type ; From 8b7a813a08b462bd99530eca2d5cb138e0c04b6d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 1 Oct 2009 23:49:45 -0500 Subject: [PATCH 17/19] change vector logical words to reuse the input sequence types so that they work as simd fallbacks --- basis/math/vectors/simd/simd-tests.factor | 30 ++++++++++++++++------- basis/math/vectors/vectors.factor | 12 ++++----- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 4ce1befdbc..7f43124d59 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -286,13 +286,13 @@ simd-classes [ "== Checking element tests" print -[ { t f t f f f t f } ] +[ short-8{ t f t f f f t f } ] [ short-8{ 1 2 3 4 5 6 7 8 } short-8{ 1 0 3 -1 -2 -3 7 -4 } v= ] unit-test [ short-8{ t f t f f f t f } ] [ short-8{ 1 2 3 4 5 6 7 8 } short-8{ 1 0 3 -1 -2 -3 7 -4 } [ { short-8 short-8 } declare v= ] compile-call ] unit-test -[ { t f t f f f t f } ] +[ int-8{ t f t f f f t f } ] [ int-8{ 1 2 3 4 5 6 7 8 } int-8{ 1 0 3 -1 -2 -3 7 -4 } v= ] unit-test [ int-8{ t f t f f f t f } ] @@ -301,14 +301,18 @@ simd-classes [ [ int-4{ t f t f } ] [ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } [ { int-4 int-4 } declare v= ] compile-call ] unit-test -[ { t f t f } ] +[ int-4{ t f t f } ] [ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } v= ] unit-test [ int-4{ t f t f } ] [ int-4{ 1 2 3 4 } int-4{ 1 0 3 -1 } [ { int-4 int-4 } declare v= ] compile-call ] unit-test -[ { t f t f } ] -[ float-4{ 1.0 0/0. 3.0 4.0 } float-4{ 1.0 0/0. 3.0 -1.0 } v= ] unit-test +[ t ] +[ + float-4{ t f t f } + float-4{ 1.0 0/0. 3.0 4.0 } float-4{ 1.0 0/0. 3.0 -1.0 } v= + exact= +] unit-test [ t ] [ @@ -317,8 +321,12 @@ simd-classes [ exact= ] unit-test -[ { t f t f f t t t } ] -[ float-8{ 1.0 0/0. 3.0 4.0 5.0 6.0 7.0 8.0 } float-8{ 1.0 0/0. 3.0 -1.0 -2.0 6.0 7.0 8.0 } v= ] unit-test +[ t ] +[ + float-8{ t f t f f t t t } + float-8{ 1.0 0/0. 3.0 4.0 5.0 6.0 7.0 8.0 } float-8{ 1.0 0/0. 3.0 -1.0 -2.0 6.0 7.0 8.0 } v= + exact= +] unit-test [ t ] [ @@ -327,8 +335,12 @@ simd-classes [ exact= ] unit-test -[ { f t } ] -[ double-2{ 0/0. 3.0 } double-2{ 0/0. 3.0 } v= ] unit-test +[ t ] +[ + double-2{ f t } + double-2{ 0/0. 3.0 } double-2{ 0/0. 3.0 } v= + exact= +] unit-test [ t ] [ diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index c2bfb3f295..302380cd09 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -96,12 +96,12 @@ PRIVATE> : vany? ( v -- ? ) [ ] any? ; : vnone? ( v -- ? ) [ not ] all? ; -: v< ( u v -- w ) [ < ] { } 2map-as ; -: v<= ( u v -- w ) [ <= ] { } 2map-as ; -: v>= ( u v -- w ) [ >= ] { } 2map-as ; -: v> ( u v -- w ) [ > ] { } 2map-as ; -: vunordered? ( u v -- w ) [ unordered? ] { } 2map-as ; -: v= ( u v -- w ) [ = ] { } 2map-as ; +: v< ( u v -- w ) [ < ] 2map ; +: v<= ( u v -- w ) [ <= ] 2map ; +: v>= ( u v -- w ) [ >= ] 2map ; +: v> ( u v -- w ) [ > ] 2map ; +: vunordered? ( u v -- w ) [ unordered? ] 2map ; +: v= ( u v -- w ) [ = ] 2map ; : v? ( mask true false -- w ) [ vbitand ] [ vbitandn ] bi-curry* bi vbitor ; inline From fb09d679366654b6ab5aa7be8d8baf7d5565aa21 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 1 Oct 2009 23:54:19 -0500 Subject: [PATCH 18/19] compiler.cfg.alias-analysis: fix ##compare where operands are copies --- basis/compiler/cfg/alias-analysis/alias-analysis.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/compiler/cfg/alias-analysis/alias-analysis.factor b/basis/compiler/cfg/alias-analysis/alias-analysis.factor index 0c22d24a2c..2303b98aed 100644 --- a/basis/compiler/cfg/alias-analysis/alias-analysis.factor +++ b/basis/compiler/cfg/alias-analysis/alias-analysis.factor @@ -278,7 +278,7 @@ M: ##copy analyze-aliases* : useless-compare? ( insn -- ? ) { [ cc>> cc= eq? ] - [ [ src1>> vreg>ac ] [ src2>> vreg>ac ] bi = not ] + [ [ src1>> ] [ src2>> ] bi [ resolve vreg>ac ] bi@ = not ] } 1&& ; inline M: ##compare analyze-aliases* From 218d1cdd315483f3bc85ab55ab8dc82107a0ee33 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 2 Oct 2009 01:23:17 -0500 Subject: [PATCH 19/19] prettyprint.backend: fix performance problem when printing large arrays with length limit on --- basis/prettyprint/backend/backend.factor | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/basis/prettyprint/backend/backend.factor b/basis/prettyprint/backend/backend.factor index fb47c50fbe..f919573ea9 100644 --- a/basis/prettyprint/backend/backend.factor +++ b/basis/prettyprint/backend/backend.factor @@ -169,11 +169,10 @@ M: tuple pprint* : do-length-limit ( seq -- trimmed n/f ) length-limit get dup [ over length over [-] - dup zero? [ 2drop f ] [ [ head ] dip ] if + dup zero? [ 2drop f ] [ [ head-slice ] dip ] if ] when ; : pprint-elements ( seq -- ) - >array do-length-limit [ [ pprint* ] each ] dip [ "~" swap number>string " more~" 3append text ] when* ;