From e77341b90cb47978046c12bdbba1ead2de20a337 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 02:08:32 -0500 Subject: [PATCH 01/15] math.vectors.simd: redesign to be more flexible, integer SIMD work in progress --- basis/alien/c-types/c-types.factor | 8 + basis/cpu/architecture/architecture.factor | 34 +++- basis/cpu/x86/features/features.factor | 49 ++--- basis/cpu/x86/x86.factor | 115 ++++++++++- basis/math/floats/env/x86/x86.factor | 4 +- .../math/vectors/simd/functor/functor.factor | 134 +++++++++++-- .../simd/intrinsics/intrinsics-tests.factor | 18 ++ .../vectors/simd/intrinsics/intrinsics.factor | 54 +++++- basis/math/vectors/simd/simd-docs.factor | 84 -------- basis/math/vectors/simd/simd.factor | 180 +----------------- basis/math/vectors/simd/summary.txt | 1 + 11 files changed, 363 insertions(+), 318 deletions(-) create mode 100644 basis/math/vectors/simd/intrinsics/intrinsics-tests.factor create mode 100644 basis/math/vectors/simd/summary.txt diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index fa27e29c04..afbb664fed 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -472,3 +472,11 @@ SYMBOLS: \ ulong \ size_t typedef ] with-compilation-unit +M: char-16-rep rep-component-type drop char ; +M: uchar-16-rep rep-component-type drop uchar ; +M: short-8-rep rep-component-type drop short ; +M: ushort-8-rep rep-component-type drop ushort ; +M: int-4-rep rep-component-type drop int ; +M: uint-4-rep rep-component-type drop uint ; +M: float-4-rep rep-component-type drop float ; +M: double-2-rep rep-component-type drop double ; diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index d6611c3384..61e4e2df37 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -22,8 +22,6 @@ SINGLETONS: float-rep double-rep ; ! On x86, floating point registers are really vector registers SINGLETONS: -float-4-rep -double-2-rep char-16-rep uchar-16-rep short-8-rep @@ -31,9 +29,11 @@ ushort-8-rep int-4-rep uint-4-rep ; -UNION: vector-rep +SINGLETONS: float-4-rep -double-2-rep +double-2-rep ; + +UNION: int-vector-rep char-16-rep uchar-16-rep short-8-rep @@ -41,6 +41,14 @@ ushort-8-rep int-4-rep uint-4-rep ; +UNION: float-vector-rep +float-4-rep +double-2-rep ; + +UNION: vector-rep +int-vector-rep +float-vector-rep ; + UNION: representation any-rep tagged-rep @@ -76,10 +84,15 @@ M: double-rep rep-size drop 8 ; M: stack-params rep-size drop cell ; M: vector-rep rep-size drop 16 ; +GENERIC: rep-component-type ( rep -- n ) + +! Methods defined in alien.c-types + GENERIC: scalar-rep-of ( rep -- rep' ) M: float-4-rep scalar-rep-of drop float-rep ; M: double-2-rep scalar-rep-of drop double-rep ; +M: int-vector-rep scalar-rep-of drop int-rep ; ! Mapping from register class to machine registers HOOK: machine-registers cpu ( -- assoc ) @@ -167,7 +180,6 @@ HOOK: %unbox-vector cpu ( dst src rep -- ) HOOK: %broadcast-vector cpu ( dst src rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) - HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) @@ -177,6 +189,18 @@ HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- ) +HOOK: %broadcast-vector-reps cpu ( -- reps ) +HOOK: %gather-vector-2-reps cpu ( -- reps ) +HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %add-vector-reps cpu ( -- reps ) +HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %mul-vector-reps cpu ( -- reps ) +HOOK: %div-vector-reps cpu ( -- reps ) +HOOK: %min-vector-reps cpu ( -- reps ) +HOOK: %max-vector-reps cpu ( -- reps ) +HOOK: %sqrt-vector-reps cpu ( -- reps ) +HOOK: %horizontal-add-vector-reps cpu ( -- reps ) + HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) HOOK: %box-alien cpu ( dst src temp -- ) diff --git a/basis/cpu/x86/features/features.factor b/basis/cpu/x86/features/features.factor index c5cf2d470a..5fad4e802c 100644 --- a/basis/cpu/x86/features/features.factor +++ b/basis/cpu/x86/features/features.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: system kernel math math.order math.parser namespaces -alien.c-types alien.syntax combinators locals init io cpu.x86 +USING: system kernel memoize math math.order math.parser +namespaces alien.c-types alien.syntax combinators locals init io compiler compiler.units accessors ; IN: cpu.x86.features @@ -13,7 +13,16 @@ FUNCTION: longlong read_timestamp_counter ( ) ; PRIVATE> -ALIAS: sse-version sse_version +: sse-version ( -- n ) + sse_version + "sse-version" get string>number [ min ] when* ; foldable + +: sse? ( -- ? ) sse-version 10 >= ; foldable +: sse2? ( -- ? ) sse-version 20 >= ; foldable +: sse3? ( -- ? ) sse-version 30 >= ; foldable +: ssse3? ( -- ? ) sse-version 33 >= ; foldable +: sse4.1? ( -- ? ) sse-version 41 >= ; foldable +: sse4.2? ( -- ? ) sse-version 42 >= ; foldable : sse-string ( version -- string ) { @@ -32,37 +41,3 @@ M: x86 instruction-count read_timestamp_counter ; : count-instructions ( quot -- n ) instruction-count [ call ] dip instruction-count swap - ; inline - -USING: cpu.x86.features cpu.x86.features.private ; - -:: install-sse-check ( version -- ) - [ - sse-version version < [ - "This image was built to use " write - version sse-string write - " but your CPU only supports " write - sse-version sse-string write "." print - "You will need to bootstrap Factor again." print - flush - 1 exit - ] when - ] "cpu.x86" add-init-hook ; - -: enable-sse ( version -- ) - { - { 00 [ ] } - { 10 [ ] } - { 20 [ enable-sse2 ] } - { 30 [ enable-sse3 ] } - { 33 [ enable-sse3 ] } - { 41 [ enable-sse3 ] } - { 42 [ enable-sse3 ] } - } case ; - -[ { sse_version } compile ] with-optimizer - -"Checking for multimedia extensions: " write sse-version -"sse-version" get [ string>number min ] when* -[ sse-string write " detected" print ] -[ install-sse-check ] -[ enable-sse ] tri diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 04b5308836..4d80862ed3 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -4,7 +4,8 @@ USING: accessors assocs alien alien.c-types arrays strings cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.architecture kernel kernel.private math memory namespaces make sequences words system layouts combinators math.order fry locals -compiler.constants byte-arrays +compiler.constants byte-arrays io macros quotations cpu.x86.features +cpu.x86.features.private compiler compiler.units init compiler.cfg.registers compiler.cfg.instructions compiler.cfg.intrinsics @@ -250,12 +251,26 @@ M:: x86 %unbox-vector ( dst src rep -- ) dst src byte-array-offset [+] rep copy-register ; +MACRO: available-reps ( alist -- ) + ! Each SSE version adds new representations and supports + ! all old ones + unzip { } [ append ] accumulate rest swap suffix + [ [ 1quotation ] map ] bi@ zip + reverse [ { } ] suffix + '[ _ cond ] ; + M: x86 %broadcast-vector ( dst src rep -- ) { { float-4-rep [ [ MOVSS ] [ drop dup 0 SHUFPS ] 2bi ] } { double-2-rep [ [ MOVSD ] [ drop dup UNPCKLPD ] 2bi ] } } case ; +M: x86 %broadcast-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) rep { { @@ -269,6 +284,11 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) } } case ; +M: x86 %gather-vector-4-reps + { + { sse? { float-4-rep } } + } available-reps ; + M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) rep { { @@ -280,6 +300,11 @@ M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) } } case ; +M: x86 %gather-vector-2-reps + { + { sse2? { double-2-rep } } + } available-reps ; + M: x86 %add-vector ( dst src1 src2 rep -- ) { { float-4-rep [ ADDPS ] } @@ -292,6 +317,12 @@ M: x86 %add-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PADDD ] } } case drop ; +M: x86 %add-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + M: x86 %sub-vector ( dst src1 src2 rep -- ) { { float-4-rep [ SUBPS ] } @@ -304,43 +335,92 @@ M: x86 %sub-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PSUBD ] } } case drop ; +M: x86 %sub-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + M: x86 %mul-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MULPS ] } { double-2-rep [ MULPD ] } - { int-4-rep [ PMULLW ] } + { short-8-rep [ PMULLW ] } + { ushort-8-rep [ PMULLW ] } + { int-4-rep [ PMULLD ] } + { uint-4-rep [ PMULLD ] } } case drop ; +M: x86 %mul-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + M: x86 %div-vector ( dst src1 src2 rep -- ) { { float-4-rep [ DIVPS ] } { double-2-rep [ DIVPD ] } } case drop ; +M: x86 %div-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + M: x86 %min-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MINPS ] } { double-2-rep [ MINPD ] } + { uchar-16-rep [ PMINUB ] } + { short-8-rep [ PMINSW ] } } case drop ; +M: x86 %min-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep short-8-rep uchar-16-rep } } + } available-reps ; + M: x86 %max-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MAXPS ] } { double-2-rep [ MAXPD ] } + { uchar-16-rep [ PMAXUB ] } + { short-8-rep [ PMAXSW ] } } case drop ; +M: x86 %max-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep short-8-rep uchar-16-rep } } + } available-reps ; + M: x86 %sqrt-vector ( dst src rep -- ) { { float-4-rep [ SQRTPS ] } { double-2-rep [ SQRTPD ] } } case ; +M: x86 %sqrt-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep } } + } available-reps ; + M: x86 %horizontal-add-vector ( dst src rep -- ) { { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } } case ; +M: x86 %horizontal-add-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep short-8-rep uchar-16-rep } } + } available-reps ; + M: x86 %unbox-alien ( dst src -- ) alien-offset [+] MOV ; @@ -775,3 +855,34 @@ M: x86 small-enough? ( n -- ? ) enable-sse3-simd ; enable-min/max + +:: install-sse-check ( version -- ) + [ + sse-version version < [ + "This image was built to use " write + version sse-string write + " but your CPU only supports " write + sse-version sse-string write "." print + "You will need to bootstrap Factor again." print + flush + 1 exit + ] when + ] "cpu.x86" add-init-hook ; + +: enable-sse ( version -- ) + { + { 00 [ ] } + { 10 [ ] } + { 20 [ enable-sse2 ] } + { 30 [ enable-sse3 ] } + { 33 [ enable-sse3 ] } + { 41 [ enable-sse3 ] } + { 42 [ enable-sse3 ] } + } case ; + +[ { sse_version } compile ] with-optimizer + +"Checking for multimedia extensions: " write sse-version 30 min +[ sse-string write " detected" print ] +[ install-sse-check ] +[ enable-sse ] tri diff --git a/basis/math/floats/env/x86/x86.factor b/basis/math/floats/env/x86/x86.factor index e91fc4eda9..e9120567aa 100644 --- a/basis/math/floats/env/x86/x86.factor +++ b/basis/math/floats/env/x86/x86.factor @@ -31,9 +31,7 @@ M: x87-env (set-fp-env-register) set_x87_env ; M: x86 (fp-env-registers) - sse-version 20 >= - [ 2array ] - [ 1array ] if ; + sse2? [ 2array ] [ 1array ] if ; CONSTANT: sse-exception-flag-bits HEX: 3f CONSTANT: sse-exception-flag>bit diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 641585a5d7..a97dc192be 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -1,24 +1,94 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types byte-arrays classes functors -kernel math parser prettyprint.custom sequences -sequences.private literals ; +USING: accessors alien.c-types assocs byte-arrays classes +effects fry functors generalizations kernel literals locals +math math.functions math.vectors math.vectors.simd.intrinsics +math.vectors.specialization parser prettyprint.custom sequences +sequences.private strings words definitions macros cpu.architecture ; IN: math.vectors.simd.functor ERROR: bad-length got expected ; +MACRO: simd-boa ( rep class -- simd-array ) + [ rep-components ] [ new ] bi* '[ _ _ nsequence ] ; + +:: define-boa-custom-inlining ( word rep class -- ) + word [ + drop + rep rep rep-gather-word supported-simd-op? [ + [ rep (simd-boa) class boa ] + ] [ word def>> ] if + ] "custom-inlining" set-word-prop ; + +: simd-with ( rep class x -- simd-array ) + [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline + +:: define-with-custom-inlining ( word rep class -- ) + word [ + drop + rep \ (simd-broadcast) supported-simd-op? [ + [ rep rep-coerce rep (simd-broadcast) class boa ] + ] [ word def>> ] if + ] "custom-inlining" set-word-prop ; + +: boa-effect ( rep n -- effect ) + [ rep-components ] dip * + [ CHAR: a + 1string ] map + { "simd-vector" } ; + +: supported-simd-ops ( assoc rep -- assoc' ) + [ + { + { v+ (simd-v+) } + { v- (simd-v-) } + { v* (simd-v*) } + { v/ (simd-v/) } + { vmin (simd-vmin) } + { vmax (simd-vmax) } + { sum (simd-sum) } + } + ] dip + '[ nip _ swap supported-simd-op? ] assoc-filter + '[ drop _ key? ] assoc-filter ; + +:: high-level-ops ( ctor -- assoc ) + ! Some SIMD operations are defined in terms of others. + { + { vneg [ [ dup v- ] keep v- ] } + { v. [ v* sum ] } + { n+v [ [ ctor execute ] dip v+ ] } + { v+n [ ctor execute v+ ] } + { n-v [ [ ctor execute ] dip v- ] } + { v-n [ ctor execute v- ] } + { n*v [ [ ctor execute ] dip v* ] } + { v*n [ ctor execute v* ] } + { n/v [ [ ctor execute ] dip v/ ] } + { v/n [ ctor execute v/ ] } + { norm-sq [ dup v. assert-positive ] } + { norm [ norm-sq sqrt ] } + { normalize [ dup norm v/n ] } + { distance [ v- norm ] } + } ; + +:: simd-vector-words ( class ctor rep assoc -- ) + class + rep rep-component-type c-type-boxed-class + assoc rep supported-simd-ops + ctor high-level-ops assoc-union + specialize-vector-words ; + FUNCTOR: define-simd-128 ( T -- ) -T-TYPE IS ${T} - -N [ 16 T-TYPE heap-size /i ] +N [ 16 T heap-size /i ] A DEFINES-CLASS ${T}-${N} +A-boa DEFINES ${A}-boa +A-with DEFINES ${A}-with >A DEFINES >${A} A{ DEFINES ${A}{ -NTH [ T-TYPE dup c-type-getter-boxer array-accessor ] -SET-NTH [ T-TYPE dup c-setter array-accessor ] +NTH [ T dup c-type-getter-boxer array-accessor ] +SET-NTH [ T dup c-setter array-accessor ] A-rep IS ${A}-rep A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op @@ -59,6 +129,16 @@ M: A pprint* pprint-object ; SYNTAX: A{ \ } [ >A ] parse-literal ; +: A-with ( x -- simd-array ) [ A-rep A ] dip simd-with ; + +\ A-with \ A-rep \ A define-with-custom-inlining + +\ A-boa [ \ A-rep \ A simd-boa ] \ A-rep 1 boa-effect define-declared + +\ A-rep rep-gather-word [ + \ A-boa \ A-rep \ A define-boa-custom-inlining +] when + INSTANCE: A sequence n-op ( v quot -- n ) [ underlying>> A-rep ] dip call ; inline +\ A \ A-with \ A-rep H{ + { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } + { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } + { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } + { sum [ [ (simd-sum) ] \ A-v->n-op execute ] } +} simd-vector-words + PRIVATE> ;FUNCTOR @@ -76,14 +166,16 @@ PRIVATE> ! Synthesize 256-bit vectors from a pair of 128-bit vectors FUNCTOR: define-simd-256 ( T -- ) -T-TYPE IS ${T} - -N [ 32 T-TYPE heap-size /i ] +N [ 32 T heap-size /i ] N/2 [ N 2 / ] A/2 IS ${T}-${N/2} +A/2-boa IS ${A/2}-boa +A/2-with IS ${A/2}-with A DEFINES-CLASS ${T}-${N} +A-boa DEFINES ${A}-boa +A-with DEFINES ${A}-with >A DEFINES >${A} A{ DEFINES ${A}{ @@ -137,6 +229,16 @@ M: A >pprint-sequence ; M: A pprint* pprint-object ; +: A-with ( x -- simd-array ) + [ A/2-with ] [ A/2-with ] bi [ underlying>> ] bi@ + \ A boa ; inline + +: A-boa ( ... -- simd-array ) + [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ + \ A boa ; + +\ A-rep 2 boa-effect \ A-boa set-stack-effect + INSTANCE: A sequence : A-vv->v-op ( v1 v2 quot -- v3 ) @@ -148,4 +250,14 @@ INSTANCE: A sequence [ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ] dip call ; inline +\ A \ A-with \ A-rep H{ + { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } + { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } + { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } + { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] } +} simd-vector-words + ;FUNCTOR diff --git a/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor new file mode 100644 index 0000000000..84eee935a0 --- /dev/null +++ b/basis/math/vectors/simd/intrinsics/intrinsics-tests.factor @@ -0,0 +1,18 @@ +IN: math.vectors.simd.intrinsics.tests +USING: math.vectors.simd.intrinsics cpu.architecture tools.test ; + +[ 16 ] [ uchar-16-rep rep-components ] unit-test +[ 16 ] [ char-16-rep rep-components ] unit-test +[ 8 ] [ ushort-8-rep rep-components ] unit-test +[ 8 ] [ short-8-rep rep-components ] unit-test +[ 4 ] [ uint-4-rep rep-components ] unit-test +[ 4 ] [ int-4-rep rep-components ] unit-test +[ 4 ] [ float-4-rep rep-components ] unit-test +[ 2 ] [ double-2-rep rep-components ] unit-test + +{ 4 1 } [ uint-4-rep (simd-boa) ] must-infer-as +{ 4 1 } [ int-4-rep (simd-boa) ] must-infer-as +{ 4 1 } [ float-4-rep (simd-boa) ] must-infer-as +{ 2 1 } [ double-2-rep (simd-boa) ] must-infer-as + + diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 914d1ef169..a7d019af81 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -1,6 +1,8 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel alien alien.data cpu.architecture libc ; +USING: alien alien.c-types alien.data assocs combinators +cpu.architecture fry generalizations kernel libc macros math +sequences ; IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; @@ -26,3 +28,53 @@ ERROR: bad-simd-call ; ! Inefficient version for when intrinsics are missing [ swap swap ] dip rep-size memcpy ; +<< + +: rep-components ( rep -- n ) + 16 swap rep-component-type heap-size /i ; foldable + +: rep-coercer ( rep -- quot ) + { + { [ dup int-vector-rep? ] [ [ >fixnum ] ] } + { [ dup float-vector-rep? ] [ [ >float ] ] } + } cond nip ; foldable + +: rep-coerce ( value rep -- value' ) + rep-coercer call( value -- value' ) ; inline + +CONSTANT: rep-gather-words + { + { 2 (simd-gather-2) } + { 4 (simd-gather-4) } + } + +: rep-gather-word ( rep -- word ) + rep-components rep-gather-words at ; + +>> + +MACRO: (simd-boa) ( rep -- quot ) + { + [ rep-coercer ] + [ rep-components ] + [ ] + [ rep-gather-word ] + } cleave + '[ _ _ napply _ _ execute ] ; + +GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) + +M: vector-rep supported-simd-op? + { + { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-v/) [ %div-vector-reps ] } + { \ (simd-vmin) [ %min-vector-reps ] } + { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-vsqrt) [ %sqrt-vector-reps ] } + { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-broadcast) [ %broadcast-vector-reps ] } + { \ (simd-gather-2) [ %gather-vector-2-reps ] } + { \ (simd-gather-4) [ %gather-vector-4-reps ] } + } case member? ; diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index b110de1de8..d6131b3a71 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -43,22 +43,6 @@ $nl } "The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers." $nl -"Operations on " { $link float-4 } " instances:" -{ $subsection float-4-with } -{ $subsection float-4-boa } -{ $subsection POSTPONE: float-4{ } -"Operations on " { $link double-2 } " instances:" -{ $subsection double-2-with } -{ $subsection double-2-boa } -{ $subsection POSTPONE: double-2{ } -"Operations on " { $link float-8 } " instances:" -{ $subsection float-8-with } -{ $subsection float-8-boa } -{ $subsection POSTPONE: float-8{ } -"Operations on " { $link double-4 } " instances:" -{ $subsection double-4-with } -{ $subsection double-4-boa } -{ $subsection POSTPONE: double-4{ } "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words." { $see-also "c-types-specs" } ; @@ -184,72 +168,4 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" { $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.intrinsics" } ; -! ! ! float-4 - -HELP: float-4 -{ $class-description "A sequence of four single-precision floating point values. New instances can be created with " { $link float-4-with } " or " { $link float-4-boa } "." } ; - -HELP: float-4-with -{ $values { "x" float } { "simd-array" float-4 } } -{ $description "Creates a new vector with all four components equal to a scalar." } ; - -HELP: float-4-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" float-4 } } -{ $description "Creates a new vector from four scalar components." } ; - -HELP: float-4{ -{ $syntax "float-4{ a b c d }" } -{ $description "Literal syntax for a " { $link float-4 } "." } ; - -! ! ! double-2 - -HELP: double-2 -{ $class-description "A sequence of two double-precision floating point values. New instances can be created with " { $link double-2-with } " or " { $link double-2-boa } "." } ; - -HELP: double-2-with -{ $values { "x" float } { "simd-array" double-2 } } -{ $description "Creates a new vector with both components equal to a scalar." } ; - -HELP: double-2-boa -{ $values { "a" float } { "b" float } { "simd-array" double-2 } } -{ $description "Creates a new vector from two scalar components." } ; - -HELP: double-2{ -{ $syntax "double-2{ a b }" } -{ $description "Literal syntax for a " { $link double-2 } "." } ; - -! ! ! float-8 - -HELP: float-8 -{ $class-description "A sequence of eight single-precision floating point values. New instances can be created with " { $link float-8-with } " or " { $link float-8-boa } "." } ; - -HELP: float-8-with -{ $values { "x" float } { "simd-array" float-8 } } -{ $description "Creates a new vector with all eight components equal to a scalar." } ; - -HELP: float-8-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "e" float } { "f" float } { "g" float } { "h" float } { "simd-array" float-8 } } -{ $description "Creates a new vector from eight scalar components." } ; - -HELP: float-8{ -{ $syntax "float-8{ a b c d e f g h }" } -{ $description "Literal syntax for a " { $link float-8 } "." } ; - -! ! ! double-4 - -HELP: double-4 -{ $class-description "A sequence of four double-precision floating point values. New instances can be created with " { $link double-4-with } " or " { $link double-4-boa } "." } ; - -HELP: double-4-with -{ $values { "x" float } { "simd-array" double-4 } } -{ $description "Creates a new vector with all four components equal to a scalar." } ; - -HELP: double-4-boa -{ $values { "a" float } { "b" float } { "c" float } { "d" float } { "simd-array" double-4 } } -{ $description "Creates a new vector from four scalar components." } ; - -HELP: double-4{ -{ $syntax "double-4{ a b c d }" } -{ $description "Literal syntax for a " { $link double-4 } "." } ; - ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index a3c99ae217..c5e7d6f75d 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,185 +1,15 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types byte-arrays cpu.architecture -kernel math math.functions math.vectors -math.vectors.simd.functor math.vectors.simd.intrinsics -math.vectors.specialization parser prettyprint.custom sequences -sequences.private locals assocs words fry ; -FROM: alien.c-types => float ; -QUALIFIED-WITH: math m +USING: alien.c-types cpu.architecture kernel +math.vectors.simd.functor vocabs.loader ; +FROM: sequences => each ; IN: math.vectors.simd << -DEFER: float-4 -DEFER: double-2 -DEFER: float-8 -DEFER: double-4 - -"double" define-simd-128 -"float" define-simd-128 -"double" define-simd-256 -"float" define-simd-256 +{ double float char uchar short ushort int uint } +[ [ define-simd-128 ] [ define-simd-256 ] bi ] each >> -: float-4-with ( x -- simd-array ) - [ 4 ] dip >float '[ _ ] \ float-4 new replicate-as ; - -: float-4-boa ( a b c d -- simd-array ) - \ float-4 new 4sequence ; - -: double-2-with ( x -- simd-array ) - [ 2 ] dip >float '[ _ ] \ double-2 new replicate-as ; - -: double-2-boa ( a b -- simd-array ) - \ double-2 new 2sequence ; - -! More efficient expansions for the above, used when SIMD is -! actually available. - -<< - -\ float-4-with [ - drop - \ (simd-broadcast) "intrinsic" word-prop [ - [ >float float-4-rep (simd-broadcast) \ float-4 boa ] - ] [ \ float-4-with def>> ] if -] "custom-inlining" set-word-prop - -\ float-4-boa [ - drop - \ (simd-gather-4) "intrinsic" word-prop [ - [| a b c d | - a >float b >float c >float d >float - float-4-rep (simd-gather-4) \ float-4 boa - ] - ] [ \ float-4-boa def>> ] if -] "custom-inlining" set-word-prop - -\ double-2-with [ - drop - \ (simd-broadcast) "intrinsic" word-prop [ - [ >float double-2-rep (simd-broadcast) \ double-2 boa ] - ] [ \ double-2-with def>> ] if -] "custom-inlining" set-word-prop - -\ double-2-boa [ - drop - \ (simd-gather-4) "intrinsic" word-prop [ - [ [ >float ] bi@ double-2-rep (simd-gather-2) \ double-2 boa ] - ] [ \ double-2-boa def>> ] if -] "custom-inlining" set-word-prop - ->> - -: float-8-with ( x -- simd-array ) - [ float-4-with ] [ float-4-with ] bi [ underlying>> ] bi@ - \ float-8 boa ; inline - -:: float-8-boa ( a b c d e f g h -- simd-array ) - a b c d float-4-boa - e f g h float-4-boa - [ underlying>> ] bi@ - \ float-8 boa ; inline - -: double-4-with ( x -- simd-array ) - [ double-2-with ] [ double-2-with ] bi [ underlying>> ] bi@ - \ double-4 boa ; inline - -:: double-4-boa ( a b c d -- simd-array ) - a b double-2-boa - c d double-2-boa - [ underlying>> ] bi@ - \ double-4 boa ; inline - -<< - - - -\ float-4 \ float-4-with m:float H{ - { v+ [ [ (simd-v+) ] float-4-vv->v-op ] } - { v- [ [ (simd-v-) ] float-4-vv->v-op ] } - { v* [ [ (simd-v*) ] float-4-vv->v-op ] } - { v/ [ [ (simd-v/) ] float-4-vv->v-op ] } - { vmin [ [ (simd-vmin) ] float-4-vv->v-op ] } - { vmax [ [ (simd-vmax) ] float-4-vv->v-op ] } - { sum [ [ (simd-sum) ] float-4-v->n-op ] } -} simd-vector-words - -\ double-2 \ double-2-with m:float H{ - { v+ [ [ (simd-v+) ] double-2-vv->v-op ] } - { v- [ [ (simd-v-) ] double-2-vv->v-op ] } - { v* [ [ (simd-v*) ] double-2-vv->v-op ] } - { v/ [ [ (simd-v/) ] double-2-vv->v-op ] } - { vmin [ [ (simd-vmin) ] double-2-vv->v-op ] } - { vmax [ [ (simd-vmax) ] double-2-vv->v-op ] } - { sum [ [ (simd-sum) ] double-2-v->n-op ] } -} simd-vector-words - -\ float-8 \ float-8-with m:float H{ - { v+ [ [ (simd-v+) ] float-8-vv->v-op ] } - { v- [ [ (simd-v-) ] float-8-vv->v-op ] } - { v* [ [ (simd-v*) ] float-8-vv->v-op ] } - { v/ [ [ (simd-v/) ] float-8-vv->v-op ] } - { vmin [ [ (simd-vmin) ] float-8-vv->v-op ] } - { vmax [ [ (simd-vmax) ] float-8-vv->v-op ] } - { sum [ [ (simd-sum) ] [ + ] float-8-v->n-op ] } -} simd-vector-words - -\ double-4 \ double-4-with m:float H{ - { v+ [ [ (simd-v+) ] double-4-vv->v-op ] } - { v- [ [ (simd-v-) ] double-4-vv->v-op ] } - { v* [ [ (simd-v*) ] double-4-vv->v-op ] } - { v/ [ [ (simd-v/) ] double-4-vv->v-op ] } - { vmin [ [ (simd-vmin) ] double-4-vv->v-op ] } - { vmax [ [ (simd-vmax) ] double-4-vv->v-op ] } - { sum [ [ (simd-v+) ] [ (simd-sum) ] double-4-v->n-op ] } -} simd-vector-words - ->> - -USE: vocabs.loader - "math.vectors.simd.alien" require diff --git a/basis/math/vectors/simd/summary.txt b/basis/math/vectors/simd/summary.txt new file mode 100644 index 0000000000..22593f1286 --- /dev/null +++ b/basis/math/vectors/simd/summary.txt @@ -0,0 +1 @@ +Single-instruction-multiple-data parallel vector operations From 47d87633406c48f1e7af4f2b5f6cdf21896ea35b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 16:48:17 -0500 Subject: [PATCH 02/15] More integer SIMD work - move generated vocab support from specialized-arrays to vocabs.generated - add fuzz testing to math.vectors.simd - add alien type support for integer SIMD vectors - SIMD: parsing word generates a SIMD type, instead of pre-generating them all in math.vectors.simd --- .../tree/propagation/simd/simd.factor | 1 + basis/cpu/x86/x86.factor | 3 +- .../vectors/simd/alien/alien-tests.factor | 70 --- basis/math/vectors/simd/alien/alien.factor | 42 -- .../math/vectors/simd/functor/functor.factor | 60 +- basis/math/vectors/simd/simd-docs.factor | 65 ++- basis/math/vectors/simd/simd-tests.factor | 544 +++++++----------- basis/math/vectors/simd/simd.factor | 33 +- .../specialized-arrays.factor | 11 +- .../specialized-vectors.factor | 2 +- .../alien => vocabs/generated}/authors.txt | 0 basis/vocabs/generated/generated.factor | 13 + extra/benchmark/nbody-simd/nbody-simd.factor | 1 + .../raytracer-simd/raytracer-simd.factor | 1 + extra/benchmark/simd-1/simd-1.factor | 1 + 15 files changed, 346 insertions(+), 501 deletions(-) delete mode 100644 basis/math/vectors/simd/alien/alien-tests.factor delete mode 100644 basis/math/vectors/simd/alien/alien.factor rename basis/{math/vectors/simd/alien => vocabs/generated}/authors.txt (100%) create mode 100644 basis/vocabs/generated/generated.factor diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 3baa7cdcbf..42c1f35617 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -24,6 +24,7 @@ IN: compiler.tree.propagation.simd literal>> scalar-rep-of { { float-rep [ float ] } { double-rep [ float ] } + { int-rep [ integer ] } } case ] [ drop real ] if diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4d80862ed3..322b123d99 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -417,8 +417,7 @@ M: x86 %horizontal-add-vector ( dst src rep -- ) M: x86 %horizontal-add-vector-reps { - { sse? { float-4-rep } } - { sse2? { double-2-rep short-8-rep uchar-16-rep } } + { sse3? { float-4-rep double-2-rep } } } available-reps ; M: x86 %unbox-alien ( dst src -- ) diff --git a/basis/math/vectors/simd/alien/alien-tests.factor b/basis/math/vectors/simd/alien/alien-tests.factor deleted file mode 100644 index 87540dd9a5..0000000000 --- a/basis/math/vectors/simd/alien/alien-tests.factor +++ /dev/null @@ -1,70 +0,0 @@ -USING: cpu.architecture math.vectors.simd -math.vectors.simd.intrinsics accessors math.vectors.simd.alien -kernel classes.struct tools.test compiler sequences byte-arrays -alien math kernel.private specialized-arrays combinators ; -SPECIALIZED-ARRAY: float -IN: math.vectors.simd.alien.tests - -! Vector alien intrinsics -[ float-4{ 1 2 3 4 } ] [ - [ - float-4{ 1 2 3 4 } - underlying>> 0 float-4-rep alien-vector - ] compile-call float-4 boa -] unit-test - -[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ - 16 [ 1 ] B{ } replicate-as 16 - [ - 0 [ - { byte-array c-ptr fixnum } declare - float-4-rep set-alien-vector - ] compile-call - ] keep -] unit-test - -[ float-array{ 1 2 3 4 } ] [ - [ - float-array{ 1 2 3 4 } underlying>> - float-array{ 4 3 2 1 } clone - [ underlying>> 0 float-4-rep set-alien-vector ] keep - ] compile-call -] unit-test - -STRUCT: simd-struct -{ x float-4 } -{ y double-2 } -{ z double-4 } -{ w float-8 } ; - -[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave -] unit-test - -[ - float-4{ 1 2 3 4 } - double-2{ 2 1 } - double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } -] [ - [ - simd-struct - float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y - double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w - { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave - ] compile-call -] unit-test diff --git a/basis/math/vectors/simd/alien/alien.factor b/basis/math/vectors/simd/alien/alien.factor deleted file mode 100644 index 1486f6d0af..0000000000 --- a/basis/math/vectors/simd/alien/alien.factor +++ /dev/null @@ -1,42 +0,0 @@ -! Copyright (C) 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: alien accessors alien.c-types byte-arrays compiler.units -cpu.architecture locals kernel math math.vectors.simd -math.vectors.simd.intrinsics ; -IN: math.vectors.simd.alien - -:: define-simd-128-type ( class rep -- ) - - byte-array >>class - class >>boxed-class - [ rep alien-vector class boa ] >>getter - [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter - 16 >>size - 8 >>align - rep >>rep - class name>> typedef ; - -:: define-simd-256-type ( class rep -- ) - - class >>class - class >>boxed-class - [ - [ rep alien-vector ] - [ 16 + >fixnum rep alien-vector ] 2bi - class boa - ] >>getter - [ - [ [ underlying1>> ] 2dip rep set-alien-vector ] - [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] - 3bi - ] >>setter - 32 >>size - 8 >>align - rep >>rep - class name>> typedef ; -[ - float-4 float-4-rep define-simd-128-type - double-2 double-2-rep define-simd-128-type - float-8 float-4-rep define-simd-256-type - double-4 double-2-rep define-simd-256-type -] with-compilation-unit diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index a97dc192be..57126f1bf8 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -5,6 +5,7 @@ effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture ; +QUALIFIED-WITH: math m IN: math.vectors.simd.functor ERROR: bad-length got expected ; @@ -51,11 +52,10 @@ MACRO: simd-boa ( rep class -- simd-array ) '[ nip _ swap supported-simd-op? ] assoc-filter '[ drop _ key? ] assoc-filter ; -:: high-level-ops ( ctor -- assoc ) +:: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. { { vneg [ [ dup v- ] keep v- ] } - { v. [ v* sum ] } { n+v [ [ ctor execute ] dip v+ ] } { v+n [ ctor execute v+ ] } { n-v [ [ ctor execute ] dip v- ] } @@ -67,16 +67,36 @@ MACRO: simd-boa ( rep class -- simd-array ) { norm-sq [ dup v. assert-positive ] } { norm [ norm-sq sqrt ] } { normalize [ dup norm v/n ] } - { distance [ v- norm ] } - } ; + } + ! To compute dot product and distance with integer vectors, we + ! have to do things less efficiently, with integer overflow checks, + ! in the general case. + elt-class m:float = [ + { + { distance [ v- norm ] } + { v. [ v* sum ] } + } append + ] when ; :: simd-vector-words ( class ctor rep assoc -- ) + rep rep-component-type c-type-boxed-class :> elt-class class - rep rep-component-type c-type-boxed-class + elt-class assoc rep supported-simd-ops - ctor high-level-ops assoc-union + ctor elt-class high-level-ops assoc-union specialize-vector-words ; +:: define-simd-128-type ( class rep -- ) + + byte-array >>class + class >>boxed-class + [ rep alien-vector class boa ] >>getter + [ [ underlying>> ] 2dip rep set-alien-vector ] >>setter + 16 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-128 ( T -- ) N [ 16 T heap-size /i ] @@ -159,11 +179,35 @@ INSTANCE: A sequence { sum [ [ (simd-sum) ] \ A-v->n-op execute ] } } simd-vector-words +\ A \ A-rep define-simd-128-type + PRIVATE> ;FUNCTOR ! Synthesize 256-bit vectors from a pair of 128-bit vectors +SLOT: underlying1 +SLOT: underlying2 + +:: define-simd-256-type ( class rep -- ) + + class >>class + class >>boxed-class + [ + [ rep alien-vector ] + [ 16 + >fixnum rep alien-vector ] 2bi + class boa + ] >>getter + [ + [ [ underlying1>> ] 2dip rep set-alien-vector ] + [ [ underlying2>> ] 2dip 16 + >fixnum rep set-alien-vector ] + 3bi + ] >>setter + 32 >>size + 8 >>align + rep >>rep + class typedef ; + FUNCTOR: define-simd-256 ( T -- ) N [ 32 T heap-size /i ] @@ -235,7 +279,7 @@ M: A pprint* pprint-object ; : A-boa ( ... -- simd-array ) [ A/2-boa ] N/2 ndip A/2-boa [ underlying>> ] bi@ - \ A boa ; + \ A boa ; inline \ A-rep 2 boa-effect \ A-boa set-stack-effect @@ -260,4 +304,6 @@ INSTANCE: A sequence { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] } } simd-vector-words +\ A \ A-rep define-simd-256-type + ;FUNCTOR diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d6131b3a71..42512feb6f 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -17,23 +17,45 @@ $nl "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ; ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" -"At present, the SIMD support makes use of SSE2 and a few SSE3 instructions on x86 CPUs." +"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words, decreasing performance." +"SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")." $nl -"On PowerPC, or older x86 chips without SSE2, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." +"SSE2 introduces double-precision and integer SIMD." +$nl +"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words." +$nl +"On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." $nl "The primities in the " { $vocab-link "math.vectors.simd.intrinsics" } " vocabulary do not have software fallbacks, but they should not be called directly in any case." ; ARTICLE: "math.vectors.simd.types" "SIMD vector types" -"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type such as " { $snippet "float" } " or " { $snippet "double" } ", and " { $snippet "count" } " is a vector dimension, such as 2, 4, or 8." +"Each SIMD vector type is named " { $snippet "scalar-count" } ", where " { $snippet "scalar" } " is a scalar C type and " { $snippet "count" } " is a vector dimension." $nl -"The following vector types are defined:" -{ $subsection float-4 } -{ $subsection double-2 } -{ $subsection float-8 } -{ $subsection double-4 } -"For each vector type, several words are defined:" +"To use a SIMD vector type, a parsing word is used to generate the relevant code and bring it into the vocabulary search path; this is the same idea as with " { $link "specialized-arrays" } ":" +{ $subsection POSTPONE: SIMD: } +"The following vector types are supported:" +{ $code + "char-16" + "uchar-16" + "char-32" + "uchar-32" + "short-8" + "ushort-8" + "short-16" + "ushort-16" + "int-4" + "uint-4" + "int-8" + "uint-8" + "float-4" + "float-8" + "double-2" + "double-4" +} ; + +ARTICLE: "math.vectors.simd.words" "SIMD vector words" +"For each SIMD vector type, several words are defined:" { $table { "Word" "Stack effect" "Description" } { { $snippet "type-with" } { $snippet "( x -- simd-array )" } "creates a new instance where all components are set to a single scalar" } @@ -41,8 +63,6 @@ $nl { { $snippet ">type" } { $snippet "( seq -- simd-array )" } "creates a new instance initialized with the elements of an existing sequence, which must have the correct length" } { { $snippet "type{" } { $snippet "type{ elements... }" } "parsing word defining literal syntax for an SIMD vector; the correct number of elements must be given" } } -"The " { $link float-4 } " and " { $link double-2 } " types correspond to 128-bit vector registers. The " { $link float-8 } " and " { $link double-4 } " types are not directly supported in hardware, and instead unbox to a pair of 128-bit vector registers." -$nl "To actually perform vector arithmetic on SIMD vectors, use " { $link "math-vectors" } " words." { $see-also "c-types-specs" } ; @@ -68,6 +88,8 @@ SYMBOLS: x y ; { $code <" USING: compiler.tree.debugger kernel.private math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) { float-4 float-4 float-4 } declare @@ -80,6 +102,8 @@ $nl { $code <" USING: compiler.tree.debugger hints math.vectors math.vectors.simd ; +SIMD: float-4 +IN: simd-demo : interpolate ( v a b -- w ) [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; @@ -94,6 +118,7 @@ $nl "In the " { $snippet "interpolate" } " word, there is still a call to the " { $link } " primitive, because the return value at the end is being boxed on the heap. In the next example, no memory allocation occurs at all because the SIMD vectors are stored inside a struct class (see " { $link "classes.struct" } "); also note the use of inlining:" { $code <" USING: compiler.tree.debugger math.vectors math.vectors.simd ; +SIMD: float-4 IN: simd-demo STRUCT: actor @@ -151,21 +176,23 @@ $nl "For the most part, the above primitives correspond directly to vector arithmetic words. They take a representation parameter, which is one of the singleton members of the " { $link vector-rep } " union in the " { $vocab-link "cpu.architecture" } " vocabulary." ; ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes" -"Struct classes may contain fields which store SIMD data; use one of the following C type names:" -{ $code -<" float-4 -double-2 -float-8 -double-4"> } -"Passing SIMD data as function parameters is not yet supported." ; +"Struct classes may contain fields which store SIMD data; for each SIMD vector type listed in " { $snippet "math.vectors.simd.types" } " there is a C type with the same name." +$nl +"Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ; ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." { $subsection "math.vectors.simd.intro" } { $subsection "math.vectors.simd.types" } +{ $subsection "math.vectors.simd.words" } { $subsection "math.vectors.simd.support" } { $subsection "math.vectors.simd.efficiency" } { $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.intrinsics" } ; +HELP: SIMD: +{ $syntax "SIMD: type-length" } +{ $values { "type" "a scalar C type" } { "length" "a vector dimension" } } +{ $description "Brings a SIMD array for holding " { $snippet "length" } " values of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; + ABOUT: "math.vectors.simd" diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index f5318c341f..39afe3cb03 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -1,8 +1,30 @@ +USING: accessors arrays classes compiler compiler.tree.debugger +effects fry io kernel kernel.private math math.functions +math.private math.vectors math.vectors.simd +math.vectors.simd.private prettyprint random sequences system +tools.test vocabs assocs compiler.cfg.debugger words +locals math.vectors.specialization combinators cpu.architecture +math.vectors.simd.intrinsics namespaces byte-arrays alien +specialized-arrays classes.struct ; +FROM: alien.c-types => c-type-boxed-class ; +SPECIALIZED-ARRAY: float +SIMD: char-16 +SIMD: uchar-16 +SIMD: char-32 +SIMD: uchar-32 +SIMD: short-8 +SIMD: ushort-8 +SIMD: short-16 +SIMD: ushort-16 +SIMD: int-4 +SIMD: uint-4 +SIMD: int-8 +SIMD: uint-8 +SIMD: float-4 +SIMD: float-8 +SIMD: double-2 +SIMD: double-4 IN: math.vectors.simd.tests -USING: math math.vectors.simd math.vectors.simd.private -math.vectors math.functions math.private kernel.private compiler -sequences tools.test compiler.tree.debugger accessors kernel -system ; [ float-4{ 0 0 0 0 } ] [ float-4 new ] unit-test @@ -12,344 +34,6 @@ system ; [ V{ float } ] [ [ { float-4 } declare norm ] final-classes ] unit-test -[ float-4{ 12 12 12 12 } ] [ - 12 [ float-4-with ] compile-call -] unit-test - -[ float-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ float-4-boa ] compile-call -] unit-test - -[ float-4{ 11 22 33 44 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v+ ] compile-call -] unit-test - -[ float-4{ -9 -18 -27 -36 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v- ] compile-call -] unit-test - -[ float-4{ 10 40 90 160 } ] [ - float-4{ 1 2 3 4 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v* ] compile-call -] unit-test - -[ float-4{ 10 100 1000 10000 } ] [ - float-4{ 100 2000 30000 400000 } float-4{ 10 20 30 40 } - [ { float-4 float-4 } declare v/ ] compile-call -] unit-test - -[ float-4{ -10 -20 -30 -40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmin ] compile-call -] unit-test - -[ float-4{ 10 20 30 40 } ] [ - float-4{ -10 20 -30 40 } float-4{ 10 -20 30 -40 } - [ { float-4 float-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum ] compile-call -] unit-test - -[ 13.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare sum 3.0 + ] compile-call -] unit-test - -[ 8.0 ] [ - float-4{ 1 2 3 4 } float-4{ 2 0 2 0 } - [ { float-4 float-4 } declare v. ] compile-call -] unit-test - -[ float-4{ 5 10 15 20 } ] [ - 5.0 float-4{ 1 2 3 4 } - [ { float float-4 } declare n*v ] compile-call -] unit-test - -[ float-4{ 5 10 15 20 } ] [ - float-4{ 1 2 3 4 } 5.0 - [ { float float-4 } declare v*n ] compile-call -] unit-test - -[ float-4{ 10 5 2 5 } ] [ - 10.0 float-4{ 1 2 5 2 } - [ { float float-4 } declare n/v ] compile-call -] unit-test - -[ float-4{ 0.5 1 1.5 2 } ] [ - float-4{ 1 2 3 4 } 2 - [ { float float-4 } declare v/n ] compile-call -] unit-test - -[ float-4{ 1 0 0 0 } ] [ - float-4{ 10 0 0 0 } - [ { float-4 } declare normalize ] compile-call -] unit-test - -[ 30.0 ] [ - float-4{ 1 2 3 4 } - [ { float-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - float-4{ 1 0 0 0 } - float-4{ 0 1 0 0 } - [ { float-4 float-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-2{ 12 12 } ] [ - 12 [ double-2-with ] compile-call -] unit-test - -[ double-2{ 1 2 } ] [ - 1 2 [ double-2-boa ] compile-call -] unit-test - -[ double-2{ 11 22 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v+ ] compile-call -] unit-test - -[ double-2{ -9 -18 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v- ] compile-call -] unit-test - -[ double-2{ 10 40 } ] [ - double-2{ 1 2 } double-2{ 10 20 } - [ { double-2 double-2 } declare v* ] compile-call -] unit-test - -[ double-2{ 10 100 } ] [ - double-2{ 100 2000 } double-2{ 10 20 } - [ { double-2 double-2 } declare v/ ] compile-call -] unit-test - -[ double-2{ -10 -20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmin ] compile-call -] unit-test - -[ double-2{ 10 20 } ] [ - double-2{ -10 20 } double-2{ 10 -20 } - [ { double-2 double-2 } declare vmax ] compile-call -] unit-test - -[ 3.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum ] compile-call -] unit-test - -[ 7.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare sum 4.0 + ] compile-call -] unit-test - -[ 16.0 ] [ - double-2{ 1 2 } double-2{ 2 7 } - [ { double-2 double-2 } declare v. ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - 5.0 double-2{ 1 2 } - [ { float double-2 } declare n*v ] compile-call -] unit-test - -[ double-2{ 5 10 } ] [ - double-2{ 1 2 } 5.0 - [ { float double-2 } declare v*n ] compile-call -] unit-test - -[ double-2{ 10 5 } ] [ - 10.0 double-2{ 1 2 } - [ { float double-2 } declare n/v ] compile-call -] unit-test - -[ double-2{ 0.5 1 } ] [ - double-2{ 1 2 } 2 - [ { float double-2 } declare v/n ] compile-call -] unit-test - -[ double-2{ 0 0 } ] [ double-2 new ] unit-test - -[ double-2{ 1 0 } ] [ - double-2{ 10 0 } - [ { double-2 } declare normalize ] compile-call -] unit-test - -[ 5.0 ] [ - double-2{ 1 2 } - [ { double-2 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-2{ 1 0 } - double-2{ 0 1 } - [ { double-2 double-2 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ double-4{ 0 0 0 0 } ] [ double-4 new ] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 double-4-boa -] unit-test - -[ double-4{ 1 1 1 1 } ] [ - 1 double-4-with -] unit-test - -[ double-4{ 0 1 2 3 } ] [ - 1 double-4-with [ * ] map-index -] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm-sq ] final-classes ] unit-test - -[ V{ float } ] [ [ { double-4 } declare norm ] final-classes ] unit-test - -[ double-4{ 12 12 12 12 } ] [ - 12 [ double-4-with ] compile-call -] unit-test - -[ double-4{ 1 2 3 4 } ] [ - 1 2 3 4 [ double-4-boa ] compile-call -] unit-test - -[ double-4{ 11 22 33 44 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v+ ] compile-call -] unit-test - -[ double-4{ -9 -18 -27 -36 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v- ] compile-call -] unit-test - -[ double-4{ 10 40 90 160 } ] [ - double-4{ 1 2 3 4 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v* ] compile-call -] unit-test - -[ double-4{ 10 100 1000 10000 } ] [ - double-4{ 100 2000 30000 400000 } double-4{ 10 20 30 40 } - [ { double-4 double-4 } declare v/ ] compile-call -] unit-test - -[ double-4{ -10 -20 -30 -40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmin ] compile-call -] unit-test - -[ double-4{ 10 20 30 40 } ] [ - double-4{ -10 20 -30 40 } double-4{ 10 -20 30 -40 } - [ { double-4 double-4 } declare vmax ] compile-call -] unit-test - -[ 10.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum ] compile-call -] unit-test - -[ 13.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare sum 3.0 + ] compile-call -] unit-test - -[ 8.0 ] [ - double-4{ 1 2 3 4 } double-4{ 2 0 2 0 } - [ { double-4 double-4 } declare v. ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - 5.0 double-4{ 1 2 3 4 } - [ { float double-4 } declare n*v ] compile-call -] unit-test - -[ double-4{ 5 10 15 20 } ] [ - double-4{ 1 2 3 4 } 5.0 - [ { float double-4 } declare v*n ] compile-call -] unit-test - -[ double-4{ 10 5 2 5 } ] [ - 10.0 double-4{ 1 2 5 2 } - [ { float double-4 } declare n/v ] compile-call -] unit-test - -[ double-4{ 0.5 1 1.5 2 } ] [ - double-4{ 1 2 3 4 } 2 - [ { float double-4 } declare v/n ] compile-call -] unit-test - -[ double-4{ 1 0 0 0 } ] [ - double-4{ 10 0 0 0 } - [ { double-4 } declare normalize ] compile-call -] unit-test - -[ 30.0 ] [ - double-4{ 1 2 3 4 } - [ { double-4 } declare norm-sq ] compile-call -] unit-test - -[ t ] [ - double-4{ 1 0 0 0 } - double-4{ 0 1 0 0 } - [ { double-4 double-4 } declare distance ] compile-call - 2 sqrt 1.0e-6 ~ -] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ float-8 new ] unit-test - -[ float-8{ 0 0 0 0 0 0 0 0 } ] [ [ float-8 new ] compile-call ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ 1 float-8-with ] unit-test - -[ float-8{ 1 1 1 1 1 1 1 1 } ] [ [ 1 float-8-with ] compile-call ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ 1 2 3 4 5 6 7 8 float-8-boa ] unit-test - -[ float-8{ 1 2 3 4 5 6 7 8 } ] [ [ 1 2 3 4 5 6 7 8 float-8-boa ] compile-call ] unit-test - -[ float-8{ 3 6 9 12 15 18 21 24 } ] [ - float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v+ ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 1 2 3 4 5 6 7 8 } - float-8{ 2 4 6 8 10 12 14 16 } - [ { float-8 float-8 } declare v- ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - -0.5 - float-8{ 2 4 6 8 10 12 14 16 } - [ { float float-8 } declare n*v ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -0.5 - [ { float-8 float } declare v*n ] compile-call -] unit-test - -[ float-8{ 256 128 64 32 16 8 4 2 } ] [ - 256.0 - float-8{ 1 2 4 8 16 32 64 128 } - [ { float float-8 } declare n/v ] compile-call -] unit-test - -[ float-8{ -1 -2 -3 -4 -5 -6 -7 -8 } ] [ - float-8{ 2 4 6 8 10 12 14 16 } - -2.0 - [ { float-8 float } declare v/n ] compile-call -] unit-test - ! Test puns; only on x86 cpu x86? [ [ double-2{ 4 1024 } ] [ @@ -362,3 +46,179 @@ cpu x86? [ [ { double-2 double-2 } declare v+ underlying>> 3.0 float* ] compile-call ] unit-test ] when + +! Fuzz testing +CONSTANT: simd-classes + { + char-16 + uchar-16 + char-32 + uchar-32 + short-8 + ushort-8 + short-16 + ushort-16 + int-4 + uint-4 + int-8 + uint-8 + float-4 + float-8 + double-2 + double-4 + } + +: with-ctors ( -- seq ) + simd-classes [ [ name>> "-with" append ] [ vocabulary>> ] bi lookup ] map ; + +: boa-ctors ( -- seq ) + simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ; + +: check-optimizer ( seq inputs quot -- ) + [ + [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ] + [ [ call ] dip call ] + [ [ call ] dip compile-call ] 2tri = not + ] compose filter ; inline + +"== Checking -new constructors" print + +[ { } ] [ + simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer +] unit-test + +[ { } ] [ + simd-classes [ '[ _ new ] compile-call [ zero? ] all? not ] filter +] unit-test + +"== Checking -with constructors" print + +[ { } ] [ + with-ctors [ + [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ] + ] check-optimizer +] unit-test + +"== Checking -boa constructors" print + +[ { } ] [ + boa-ctors [ + dup stack-effect in>> length + [ nip [ 1000 random ] [ ] replicate-as ] + [ fixnum swap '[ _ declare _ execute ] ] + 2bi + ] check-optimizer +] unit-test + +"== Checking vector operations" print + +: random-vector ( class -- vec ) + new [ drop 1000 random ] map ; + +:: check-vector-op ( word inputs class elt-class -- inputs quot ) + inputs [ + [ + { + { +vector+ [ class random-vector ] } + { +scalar+ [ 1000 random elt-class float = [ >float ] when ] } + } case + ] [ ] map-as + ] [ + [ + { + { +vector+ [ class ] } + { +scalar+ [ elt-class ] } + } case + ] map + ] bi + word '[ _ declare _ execute ] ; + +: ops-to-check ( elt-class -- alist ) + [ vector-words >alist ] dip float = [ + [ drop { n/v v/n v/ normalize } member? not ] assoc-filter + ] unless ; + +: check-vector-ops ( class elt-class -- ) + [ nip ops-to-check ] 2keep + '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline + +: simd-classes&reps ( -- alist ) + simd-classes [ + dup name>> [ "float" head? ] [ "double" head? ] bi or + float fixnum ? + ] { } map>assoc ; + +simd-classes&reps [ + [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test +] assoc-each + +! Other regressions +[ 8000000 ] [ + int-8{ 1000 1000 1000 1000 1000 1000 1000 1000 } + [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call +] unit-test + + +! Vector alien intrinsics +[ float-4{ 1 2 3 4 } ] [ + [ + float-4{ 1 2 3 4 } + underlying>> 0 float-4-rep alien-vector + ] compile-call float-4 boa +] unit-test + +[ B{ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 } ] [ + 16 [ 1 ] B{ } replicate-as 16 + [ + 0 [ + { byte-array c-ptr fixnum } declare + float-4-rep set-alien-vector + ] compile-call + ] keep +] unit-test + +[ float-array{ 1 2 3 4 } ] [ + [ + float-array{ 1 2 3 4 } underlying>> + float-array{ 4 3 2 1 } clone + [ underlying>> 0 float-4-rep set-alien-vector ] keep + ] compile-call +] unit-test + +STRUCT: simd-struct +{ x float-4 } +{ y double-2 } +{ z double-4 } +{ w float-8 } ; + +[ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test + +[ + float-4{ 1 2 3 4 } + double-2{ 2 1 } + double-4{ 4 3 2 1 } + float-8{ 1 2 3 4 5 6 7 8 } +] [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave +] unit-test + +[ + float-4{ 1 2 3 4 } + double-2{ 2 1 } + double-4{ 4 3 2 1 } + float-8{ 1 2 3 4 5 6 7 8 } +] [ + [ + simd-struct + float-4{ 1 2 3 4 } >>x + double-2{ 2 1 } >>y + double-4{ 4 3 2 1 } >>z + float-8{ 1 2 3 4 5 6 7 8 } >>w + { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave + ] compile-call +] unit-test diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index c5e7d6f75d..fe043032b8 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,15 +1,32 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types cpu.architecture kernel -math.vectors.simd.functor vocabs.loader ; -FROM: sequences => each ; +USING: alien.c-types combinators fry kernel lexer math math.parser +math.vectors.simd.functor sequences splitting vocabs.generated +vocabs.loader vocabs.parser words ; IN: math.vectors.simd -<< +ERROR: bad-vector-size bits ; -{ double float char uchar short ushort int uint } -[ [ define-simd-128 ] [ define-simd-256 ] bi ] each +> +: simd-vocab ( type -- vocab ) + "math.vectors.simd.instances." prepend ; -"math.vectors.simd.alien" require +: parse-simd-name ( string -- c-type quot ) + "-" split1 + [ "alien.c-types" lookup dup heap-size ] [ string>number ] bi* + * 8 * { + { 128 [ [ define-simd-128 ] ] } + { 256 [ [ define-simd-256 ] ] } + [ bad-vector-size ] + } case ; + +PRIVATE> + +: define-simd-vocab ( type -- vocab ) + [ simd-vocab ] + [ '[ _ parse-simd-name call( type -- ) ] ] bi + generate-vocab ; + +SYNTAX: SIMD: + scan define-simd-vocab use-vocab ; diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index 6931c83677..a64d052fd1 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -4,7 +4,7 @@ USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer libc math math.vectors.specialization namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader -vocabs.parser words fry combinators ; +vocabs.parser vocabs.generated words fry combinators ; IN: specialized-arrays MIXIN: specialized-array @@ -123,15 +123,6 @@ M: word (underlying-type) "c-type" word-prop ; PRIVATE> -: generate-vocab ( vocab-name quot -- vocab ) - [ dup vocab [ ] ] dip '[ - [ - [ - _ with-current-vocab - ] with-compilation-unit - ] keep - ] ?if ; inline - : define-array-vocab ( type -- vocab ) underlying-type-name [ specialized-array-vocab ] [ '[ _ define-array ] ] bi diff --git a/basis/specialized-vectors/specialized-vectors.factor b/basis/specialized-vectors/specialized-vectors.factor index 58fb97764b..7cda026cb3 100644 --- a/basis/specialized-vectors/specialized-vectors.factor +++ b/basis/specialized-vectors/specialized-vectors.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types assocs compiler.units functors growable kernel lexer namespaces parser prettyprint.custom sequences specialized-arrays specialized-arrays.private strings -vocabs vocabs.parser fry ; +vocabs vocabs.parser vocabs.generated fry ; QUALIFIED: vectors.functor IN: specialized-vectors diff --git a/basis/math/vectors/simd/alien/authors.txt b/basis/vocabs/generated/authors.txt similarity index 100% rename from basis/math/vectors/simd/alien/authors.txt rename to basis/vocabs/generated/authors.txt diff --git a/basis/vocabs/generated/generated.factor b/basis/vocabs/generated/generated.factor new file mode 100644 index 0000000000..1ddcc73db2 --- /dev/null +++ b/basis/vocabs/generated/generated.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: compiler.units fry kernel vocabs vocabs.parser ; +IN: vocabs.generated + +: generate-vocab ( vocab-name quot -- vocab ) + [ dup vocab [ ] ] dip '[ + [ + [ + _ with-current-vocab + ] with-compilation-unit + ] keep + ] ?if ; inline diff --git a/extra/benchmark/nbody-simd/nbody-simd.factor b/extra/benchmark/nbody-simd/nbody-simd.factor index e8bef58923..3aedffed91 100644 --- a/extra/benchmark/nbody-simd/nbody-simd.factor +++ b/extra/benchmark/nbody-simd/nbody-simd.factor @@ -4,6 +4,7 @@ USING: accessors fry kernel locals math math.constants math.functions math.vectors math.vectors.simd prettyprint combinators.smart sequences hints classes.struct specialized-arrays ; +SIMD: double-4 IN: benchmark.nbody-simd : solar-mass ( -- x ) 4 pi sq * ; inline diff --git a/extra/benchmark/raytracer-simd/raytracer-simd.factor b/extra/benchmark/raytracer-simd/raytracer-simd.factor index 3712972862..2d16c8cd1f 100644 --- a/extra/benchmark/raytracer-simd/raytracer-simd.factor +++ b/extra/benchmark/raytracer-simd/raytracer-simd.factor @@ -5,6 +5,7 @@ USING: arrays accessors io io.files io.files.temp io.encodings.binary kernel math math.constants math.functions math.vectors math.vectors.simd math.parser make sequences sequences.private words hints classes.struct ; +SIMD: double-4 IN: benchmark.raytracer-simd ! parameters diff --git a/extra/benchmark/simd-1/simd-1.factor b/extra/benchmark/simd-1/simd-1.factor index 4f57cca0bb..1e753a331d 100644 --- a/extra/benchmark/simd-1/simd-1.factor +++ b/extra/benchmark/simd-1/simd-1.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel io math math.functions math.parser math.vectors math.vectors.simd sequences specialized-arrays ; +SIMD: float-4 SPECIALIZED-ARRAY: float-4 IN: benchmark.simd-1 From acea55c692ce994311287d8c3fe6466d70d23bb3 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 17:43:16 -0500 Subject: [PATCH 03/15] math.vectors: add v+- word which is accelerated by SSE3 --- .../cfg/instructions/instructions.factor | 5 +++ .../compiler/cfg/intrinsics/intrinsics.factor | 1 + .../cfg/two-operand/two-operand.factor | 1 + basis/compiler/codegen/codegen.factor | 1 + .../tree/propagation/simd/simd.factor | 37 ++++++++----------- basis/cpu/architecture/architecture.factor | 2 + basis/cpu/x86/32/32.factor | 2 +- basis/cpu/x86/64/64.factor | 2 +- basis/cpu/x86/x86.factor | 22 ++++++++--- .../math/vectors/simd/functor/functor.factor | 3 ++ .../vectors/simd/intrinsics/intrinsics.factor | 2 + basis/math/vectors/simd/simd-docs.factor | 1 + .../specialization/specialization.factor | 1 + basis/math/vectors/vectors-docs.factor | 12 ++++++ basis/math/vectors/vectors-tests.factor | 4 +- basis/math/vectors/vectors.factor | 5 +++ 16 files changed, 71 insertions(+), 30 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 32e5d46c61..63297b9bdf 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -310,6 +310,11 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##add-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##mul-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 0daab82395..5b3fd1b324 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -155,6 +155,7 @@ IN: compiler.cfg.intrinsics { { math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index 20fa1d0b18..c275756046 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -48,6 +48,7 @@ UNION: two-operand-insn ##max-float ##add-vector ##sub-vector + ##add-sub-vector ##mul-vector ##div-vector ##min-vector diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index ddf5aa0e02..14246a3fbf 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -170,6 +170,7 @@ CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##add-sub-vector %add-sub-vector CODEGEN: ##mul-vector %mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 42c1f35617..db39985c94 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -1,23 +1,24 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays combinators fry +USING: accessors byte-arrays combinators fry sequences compiler.tree.propagation.info cpu.architecture kernel words math math.intervals math.vectors.simd.intrinsics ; IN: compiler.tree.propagation.simd -\ (simd-v+) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v-) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v*) { byte-array } "default-output-classes" set-word-prop - -\ (simd-v/) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmin) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vmax) { byte-array } "default-output-classes" set-word-prop - -\ (simd-vsqrt) { byte-array } "default-output-classes" set-word-prop +{ + (simd-v+) + (simd-v-) + (simd-v+-) + (simd-v*) + (simd-v/) + (simd-vmin) + (simd-vmax) + (simd-vsqrt) + (simd-broadcast) + (simd-gather-2) + (simd-gather-4) + alien-vector +} [ { byte-array } "default-output-classes" set-word-prop ] each \ (simd-sum) [ nip dup literal?>> [ @@ -30,18 +31,10 @@ IN: compiler.tree.propagation.simd ] "outputs" set-word-prop -\ (simd-broadcast) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-2) { byte-array } "default-output-classes" set-word-prop - -\ (simd-gather-4) { byte-array } "default-output-classes" set-word-prop - \ assert-positive [ real [0,inf] value-info-intersect ] "outputs" set-word-prop -\ alien-vector { byte-array } "default-output-classes" set-word-prop - ! If SIMD is not available, inline alien-vector and set-alien-vector ! to get a speedup : inline-unless-intrinsic ( word -- ) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 61e4e2df37..331d459adf 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -182,6 +182,7 @@ HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) @@ -194,6 +195,7 @@ HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %add-sub-vector-reps cpu ( -- reps ) HOOK: %mul-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 9939154512..172b500cd5 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -295,4 +295,4 @@ os windows? [ 4 "double" c-type (>>align) ] unless -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index f4018b1508..3958ba5ec8 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -228,4 +228,4 @@ USE: vocabs.loader { [ os winnt? ] [ "cpu.x86.64.winnt" require ] } } cond -"cpu.x86.features" require +check-sse diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 322b123d99..a132947cf1 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -341,6 +341,17 @@ M: x86 %sub-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ADDSUBPS ] } + { double-2-rep [ ADDSUBPD ] } + } case drop ; + +M: x86 %add-sub-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %mul-vector ( dst src1 src2 rep -- ) { { float-4-rep [ MULPS ] } @@ -879,9 +890,10 @@ enable-min/max { 42 [ enable-sse3 ] } } case ; -[ { sse_version } compile ] with-optimizer +: check-sse ( -- ) + [ { sse_version } compile ] with-optimizer -"Checking for multimedia extensions: " write sse-version 30 min -[ sse-string write " detected" print ] -[ install-sse-check ] -[ enable-sse ] tri + "Checking for multimedia extensions: " write sse-version 30 min + [ sse-string write " detected" print ] + [ install-sse-check ] + [ enable-sse ] tri ; diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 57126f1bf8..2141914d1c 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -42,6 +42,7 @@ MACRO: simd-boa ( rep class -- simd-array ) { { v+ (simd-v+) } { v- (simd-v-) } + { v+- (simd-v+-) } { v* (simd-v*) } { v/ (simd-v/) } { vmin (simd-vmin) } @@ -171,6 +172,7 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } @@ -297,6 +299,7 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index a7d019af81..6d39b9e70a 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -8,6 +8,7 @@ IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; : (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; @@ -67,6 +68,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } { \ (simd-v-) [ %sub-vector-reps ] } { \ (simd-v*) [ %mul-vector-reps ] } { \ (simd-v/) [ %div-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 42512feb6f..d108d70b26 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -162,6 +162,7 @@ $nl "It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." { $subsection (simd-v+) } { $subsection (simd-v-) } +{ $subsection (simd-v+-) } { $subsection (simd-v/) } { $subsection (simd-vmin) } { $subsection (simd-vmax) } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 21ec9f64f3..1a85f5ade7 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -55,6 +55,7 @@ H{ { v* { +vector+ +vector+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } } + { v+- { +vector+ +vector+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } } diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 7456597278..4f2f093216 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -17,6 +17,7 @@ $nl "Combining two vectors to form another vector with " { $link 2map } ":" { $subsection v+ } { $subsection v- } +{ $subsection v+- } { $subsection v* } { $subsection v/ } { $subsection vmax } @@ -57,6 +58,17 @@ HELP: v- { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise." } ; +HELP: v+- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds and subtracts alternate elements of " { $snippet "v" } " and " { $snippet "u" } " component-wise." } +{ $examples + { $example + "USING: math.vectors prettyprint ;" + "{ 1 2 3 } { 2 3 2 } v+- ." + "{ -1 5 1 }" + } +} ; + HELP: [v-] { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } { $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise; any components which become negative are set to zero." } ; diff --git a/basis/math/vectors/vectors-tests.factor b/basis/math/vectors/vectors-tests.factor index 3e56644d3e..fc482815a9 100644 --- a/basis/math/vectors/vectors-tests.factor +++ b/basis/math/vectors/vectors-tests.factor @@ -17,4 +17,6 @@ USING: math.vectors tools.test ; [ 1.125 ] [ 0.0 1.0 2.0 4.0 { 0.5 0.25 } bilerp ] unit-test -[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test \ No newline at end of file +[ 17 ] [ 0 1 2 3 4 5 6 7 { 1 2 3 } trilerp ] unit-test + +[ { 0 3 2 5 4 } ] [ { 1 2 3 4 5 } { 1 1 1 1 1 } v+- ] unit-test \ No newline at end of file diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index dd48525b53..deda1dc505 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -24,6 +24,11 @@ IN: math.vectors : vmax ( u v -- w ) [ max ] 2map ; : vmin ( u v -- w ) [ min ] 2map ; +: v+- ( u v -- w ) + [ t ] 2dip + [ [ not ] 2dip pick [ + ] [ - ] if ] 2map + nip ; + : vfloor ( v -- _v_ ) [ floor ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ; From ea44ea3522591ac1d4a4efb70a7be0bdcb940cdc Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 23:16:02 -0500 Subject: [PATCH 04/15] math.vectors.simd: add saturated arithmetic operations --- basis/alien/c-types/c-types-tests.factor | 9 +- basis/alien/c-types/c-types.factor | 25 ++++-- .../cfg/instructions/instructions.factor | 17 +++- .../compiler/cfg/intrinsics/intrinsics.factor | 11 ++- .../cfg/two-operand/two-operand.factor | 5 +- basis/compiler/codegen/codegen.factor | 5 +- .../known-words/known-words.factor | 13 +-- basis/cpu/architecture/architecture.factor | 10 ++- basis/cpu/x86/x86.factor | 89 +++++++++++-------- .../math/vectors/simd/functor/functor.factor | 19 +++- .../vectors/simd/intrinsics/intrinsics.factor | 8 +- basis/math/vectors/simd/simd-docs.factor | 6 +- .../specialization/specialization.factor | 3 + basis/math/vectors/vectors-docs.factor | 36 +++++++- basis/math/vectors/vectors.factor | 13 ++- .../specialized-arrays-tests.factor | 5 +- .../specialized-arrays.factor | 31 ++++--- vm/cpu-x86.S | 4 +- 18 files changed, 220 insertions(+), 89 deletions(-) diff --git a/basis/alien/c-types/c-types-tests.factor b/basis/alien/c-types/c-types-tests.factor index 792e7d416a..a893ffebe8 100644 --- a/basis/alien/c-types/c-types-tests.factor +++ b/basis/alien/c-types/c-types-tests.factor @@ -1,5 +1,6 @@ USING: alien alien.syntax alien.c-types kernel tools.test -sequences system libc alien.strings io.encodings.utf8 ; +sequences system libc alien.strings io.encodings.utf8 +math.constants ; IN: alien.c-types.tests CONSTANT: xyz 123 @@ -52,3 +53,9 @@ TYPEDEF: uchar* MyLPBYTE os windows? cpu x86.64? and [ [ -2147467259 ] [ 2147500037 *long ] unit-test ] when + +[ 0 ] [ -10 uchar c-type-clamp ] unit-test +[ 12 ] [ 12 uchar c-type-clamp ] unit-test +[ -10 ] [ -10 char c-type-clamp ] unit-test +[ 127 ] [ 230 char c-type-clamp ] unit-test +[ t ] [ pi dup float c-type-clamp = ] unit-test diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor index afbb664fed..1ad4f75a3c 100755 --- a/basis/alien/c-types/c-types.factor +++ b/basis/alien/c-types/c-types.factor @@ -1,11 +1,12 @@ ! Copyright (C) 2004, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: byte-arrays arrays assocs kernel kernel.private math -namespaces make parser sequences strings words splitting math.parser -cpu.architecture alien alien.accessors alien.strings quotations -layouts system compiler.units io io.files io.encodings.binary -io.streams.memory accessors combinators effects continuations fry -classes vocabs vocabs.loader words.symbol ; +math.order math.parser namespaces make parser sequences strings +words splitting cpu.architecture alien alien.accessors +alien.strings quotations layouts system compiler.units io +io.files io.encodings.binary io.streams.memory accessors +combinators effects continuations fry classes vocabs +vocabs.loader words.symbol ; QUALIFIED: math IN: alien.c-types @@ -480,3 +481,17 @@ M: int-4-rep rep-component-type drop int ; M: uint-4-rep rep-component-type drop uint ; M: float-4-rep rep-component-type drop float ; M: double-2-rep rep-component-type drop double ; + +: (unsigned-interval) ( bytes -- from to ) [ 0 ] dip 8 * 2^ 1 - ; foldable +: unsigned-interval ( c-type -- from to ) heap-size (unsigned-interval) ; foldable +: (signed-interval) ( bytes -- from to ) 8 * 1 - 2^ [ neg ] [ 1 - ] bi ; foldable +: signed-interval ( c-type -- from to ) heap-size (signed-interval) ; foldable + +: c-type-interval ( c-type -- from to ) + { + { [ dup { float double } memq? ] [ drop -1/0. 1/0. ] } + { [ dup { char short int long longlong } memq? ] [ signed-interval ] } + { [ dup { uchar ushort uint ulong ulonglong } memq? ] [ unsigned-interval ] } + } cond ; foldable + +: c-type-clamp ( value c-type -- value' ) c-type-interval clamp ; inline diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 63297b9bdf..0be42a24ab 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -305,7 +305,7 @@ def: dst use: src1 src2 literal: rep ; -PURE-INSN: ##sub-vector +PURE-INSN: ##saturated-add-vector def: dst use: src1 src2 literal: rep ; @@ -315,11 +315,26 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##sub-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##saturated-sub-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##mul-vector def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##saturated-mul-vector +def: dst +use: src1 src2 +literal: rep ; + PURE-INSN: ##div-vector def: dst use: src1 src2 diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 5b3fd1b324..b9835827fa 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -151,13 +151,16 @@ IN: compiler.cfg.intrinsics { math.integers.private:fixnum-log2 [ drop emit-fixnum-log2 ] } } enable-intrinsics ; -: enable-sse2-simd ( -- ) +: enable-simd ( -- ) { { math.vectors.simd.intrinsics:assert-positive [ drop ] } { math.vectors.simd.intrinsics:(simd-v+) [ [ ^^add-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs+) [ [ ^^saturated-add-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v+-) [ [ ^^add-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v-) [ [ ^^sub-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs-) [ [ ^^saturated-sub-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v*) [ [ ^^mul-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vs*) [ [ ^^saturated-mul-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } @@ -165,14 +168,10 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } + { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } { math.vectors.simd.intrinsics:set-alien-vector [ emit-set-alien-vector ] } } enable-intrinsics ; -: enable-sse3-simd ( -- ) - { - { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } - } enable-intrinsics ; - : emit-intrinsic ( node word -- ) "intrinsic" word-prop call( node -- ) ; diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index c275756046..1186e6b41f 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -47,9 +47,12 @@ UNION: two-operand-insn ##min-float ##max-float ##add-vector - ##sub-vector + ##saturated-add-vector ##add-sub-vector + ##sub-vector + ##saturated-sub-vector ##mul-vector + ##saturated-mul-vector ##div-vector ##min-vector ##max-vector ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 14246a3fbf..06a16663cb 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -169,9 +169,12 @@ CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##box-vector %box-vector CODEGEN: ##add-vector %add-vector -CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##saturated-add-vector %saturated-add-vector CODEGEN: ##add-sub-vector %add-sub-vector +CODEGEN: ##sub-vector %sub-vector +CODEGEN: ##saturated-sub-vector %saturated-sub-vector CODEGEN: ##mul-vector %mul-vector +CODEGEN: ##saturated-mul-vector %saturated-mul-vector CODEGEN: ##div-vector %div-vector CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index 621b8d082b..d4780b335b 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -18,6 +18,7 @@ compiler.tree.propagation.constraints compiler.tree.propagation.call-effect compiler.tree.propagation.transforms compiler.tree.propagation.simd ; +FROM: alien.c-types => (signed-interval) (unsigned-interval) ; IN: compiler.tree.propagation.known-words { + - * / } @@ -260,15 +261,9 @@ generic-comparison-ops [ alien-unsigned-8 } [ dup name>> { - { - [ "alien-signed-" ?head ] - [ string>number 8 * 1 - 2^ dup neg swap 1 - [a,b] ] - } - { - [ "alien-unsigned-" ?head ] - [ string>number 8 * 2^ 1 - 0 swap [a,b] ] - } - } cond + { [ "alien-signed-" ?head ] [ string>number (signed-interval) ] } + { [ "alien-unsigned-" ?head ] [ string>number (unsigned-interval) ] } + } cond [a,b] [ fits-in-fixnum? fixnum integer ? ] keep '[ 2drop _ ] "outputs" set-word-prop ] each diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 331d459adf..29e58cb163 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -181,9 +181,12 @@ HOOK: %broadcast-vector cpu ( dst src rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) -HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %sub-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-sub-vector cpu ( dst src1 src2 rep -- ) HOOK: %mul-vector cpu ( dst src1 src2 rep -- ) +HOOK: %saturated-mul-vector cpu ( dst src1 src2 rep -- ) HOOK: %div-vector cpu ( dst src1 src2 rep -- ) HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) @@ -194,9 +197,12 @@ HOOK: %broadcast-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) -HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %add-sub-vector-reps cpu ( -- reps ) +HOOK: %sub-vector-reps cpu ( -- reps ) +HOOK: %saturated-sub-vector-reps cpu ( -- reps ) HOOK: %mul-vector-reps cpu ( -- reps ) +HOOK: %saturated-mul-vector-reps cpu ( -- reps ) HOOK: %div-vector-reps cpu ( -- reps ) HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index a132947cf1..62caf1ffe9 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -323,6 +323,30 @@ M: x86 %add-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) + { + { char-16-rep [ PADDSB ] } + { uchar-16-rep [ PADDUSB ] } + { short-8-rep [ PADDSW ] } + { ushort-8-rep [ PADDUSW ] } + } case drop ; + +M: x86 %saturated-add-vector-reps + { + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } + } available-reps ; + +M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ADDSUBPS ] } + { double-2-rep [ ADDSUBPD ] } + } case drop ; + +M: x86 %add-sub-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + M: x86 %sub-vector ( dst src1 src2 rep -- ) { { float-4-rep [ SUBPS ] } @@ -341,15 +365,17 @@ M: x86 %sub-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; -M: x86 %add-sub-vector ( dst src1 src2 rep -- ) +M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) { - { float-4-rep [ ADDSUBPS ] } - { double-2-rep [ ADDSUBPD ] } + { char-16-rep [ PSUBSB ] } + { uchar-16-rep [ PSUBUSB ] } + { short-8-rep [ PSUBSW ] } + { ushort-8-rep [ PSUBUSW ] } } case drop ; -M: x86 %add-sub-vector-reps +M: x86 %saturated-sub-vector-reps { - { sse3? { float-4-rep double-2-rep } } + { sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } } } available-reps ; M: x86 %mul-vector ( dst src1 src2 rep -- ) @@ -368,6 +394,10 @@ M: x86 %mul-vector-reps { sse2? { double-2-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; +M: x86 %saturated-mul-vector-reps + ! No multiplication with saturation on x86 + { } ; + M: x86 %div-vector ( dst src1 src2 rep -- ) { { float-4-rep [ DIVPS ] } @@ -854,46 +884,29 @@ M: x86 small-enough? ( n -- ? ) #! set up by the caller. stack-frame get total-size>> + stack@ ; -: enable-sse2 ( -- ) - enable-float-intrinsics - enable-fsqrt - enable-float-min/max - enable-sse2-simd ; - -: enable-sse3 ( -- ) - enable-sse2 - enable-sse3-simd ; - -enable-min/max - -:: install-sse-check ( version -- ) +:: install-sse2-check ( -- ) [ - sse-version version < [ - "This image was built to use " write - version sse-string write - " but your CPU only supports " write - sse-version sse-string write "." print + sse-version 20 < [ + "This image was built to use SSE2 but your CPU does not support it." print "You will need to bootstrap Factor again." print flush 1 exit ] when ] "cpu.x86" add-init-hook ; -: enable-sse ( version -- ) - { - { 00 [ ] } - { 10 [ ] } - { 20 [ enable-sse2 ] } - { 30 [ enable-sse3 ] } - { 33 [ enable-sse3 ] } - { 41 [ enable-sse3 ] } - { 42 [ enable-sse3 ] } - } case ; +: enable-sse2 ( version -- ) + 20 >= [ + enable-float-intrinsics + enable-fsqrt + enable-float-min/max + install-sse2-check + ] when ; + + +enable-simd +enable-min/max : check-sse ( -- ) [ { sse_version } compile ] with-optimizer - - "Checking for multimedia extensions: " write sse-version 30 min - [ sse-string write " detected" print ] - [ install-sse-check ] - [ enable-sse ] tri ; + "Checking for multimedia extensions: " write sse-version + [ sse-string write " detected" print ] [ enable-sse2 ] bi ; diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 2141914d1c..7d84b18225 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -41,9 +41,12 @@ MACRO: simd-boa ( rep class -- simd-array ) [ { { v+ (simd-v+) } - { v- (simd-v-) } + { vs+ (simd-vs+) } { v+- (simd-v+-) } + { v- (simd-v-) } + { vs- (simd-vs-) } { v* (simd-v*) } + { vs* (simd-vs*) } { v/ (simd-v/) } { vmin (simd-vmin) } { vmax (simd-vmax) } @@ -111,7 +114,7 @@ A{ DEFINES ${A}{ NTH [ T dup c-type-getter-boxer array-accessor ] SET-NTH [ T dup c-setter array-accessor ] -A-rep IS ${A}-rep +A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -142,6 +145,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length underlying>> length ; inline +M: A element-type drop A-rep rep-component-type ; + M: A pprint-delims drop \ A{ \ } ; M: A >pprint-sequence ; @@ -172,9 +177,12 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } @@ -227,7 +235,7 @@ A{ DEFINES ${A}{ A-deref DEFINES-PRIVATE ${A}-deref -A-rep IS ${A/2}-rep +A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op @@ -267,6 +275,8 @@ M: A equal? over \ A instance? [ sequence= ] [ 2drop f ] if ; M: A byte-length drop 32 ; inline +M: A element-type drop A-rep rep-component-type ; + SYNTAX: A{ \ } [ >A ] parse-literal ; M: A pprint-delims drop \ A{ \ } ; @@ -298,9 +308,12 @@ INSTANCE: A sequence \ A \ A-with \ A-rep H{ { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } + { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } + { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } + { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 6d39b9e70a..63214f7da6 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -8,8 +8,11 @@ IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; : (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ; +: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; : (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ; @@ -68,9 +71,12 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } { \ (simd-v+-) [ %add-sub-vector-reps ] } { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } { \ (simd-v/) [ %div-vector-reps ] } { \ (simd-vmin) [ %min-vector-reps ] } { \ (simd-vmax) [ %max-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index d108d70b26..ef625ffff0 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -161,8 +161,12 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" $nl "It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." { $subsection (simd-v+) } -{ $subsection (simd-v-) } +{ $subsection (simd-vs+) } { $subsection (simd-v+-) } +{ $subsection (simd-v-) } +{ $subsection (simd-vs-) } +{ $subsection (simd-v*) } +{ $subsection (simd-vs*) } { $subsection (simd-v/) } { $subsection (simd-vmin) } { $subsection (simd-vmax) } diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 1a85f5ade7..2fb36d428a 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -53,11 +53,14 @@ H{ { norm-sq { +vector+ -> +nonnegative+ } } { normalize { +vector+ -> +vector+ } } { v* { +vector+ +vector+ -> +vector+ } } + { vs* { +vector+ +vector+ -> +vector+ } } { v*n { +vector+ +scalar+ -> +vector+ } } { v+ { +vector+ +vector+ -> +vector+ } } + { vs+ { +vector+ +vector+ -> +vector+ } } { v+- { +vector+ +vector+ -> +vector+ } } { v+n { +vector+ +scalar+ -> +vector+ } } { v- { +vector+ +vector+ -> +vector+ } } + { vs- { +vector+ +vector+ -> +vector+ } } { v-n { +vector+ +scalar+ -> +vector+ } } { v. { +vector+ +vector+ -> +scalar+ } } { v/ { +vector+ +vector+ -> +vector+ } } diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 4f2f093216..ce635b6d60 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -26,7 +26,11 @@ $nl { $subsection v. } { $subsection norm } { $subsection norm-sq } -{ $subsection normalize } ; +{ $subsection normalize } +"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:" +{ $subsection vs+ } +{ $subsection vs- } +{ $subsection vs* } ; ABOUT: "math-vectors" @@ -100,6 +104,34 @@ HELP: v. { $snippet "0 [ conjugate * + ] 2reduce" } } ; +HELP: vs+ +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Adds " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } +{ $examples + "With saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } vs+ ." + "uchar-array{ 170 255 220 }" + } + "Without saturation:" + { $example + "USING: math.vectors prettyprint specialized-arrays ;" + "SPECIALIZED-ARRAY: uchar" + "uchar-array{ 100 200 150 } uchar-array{ 70 70 70 } v+ ." + "uchar-array{ 170 14 220 }" + } +} ; + +HELP: vs- +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Subtracts " { $snippet "v" } " from " { $snippet "u" } " component-wise with saturation." } ; + +HELP: vs* +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } +{ $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; @@ -120,3 +152,5 @@ HELP: set-axis { 2map v+ v- v* v/ } related-words { 2reduce v. } related-words + +{ vs+ vs- vs* } related-words diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index deda1dc505..3a1b2875a9 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,9 +1,11 @@ ! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays kernel sequences math math.functions hints -math.order ; +USING: arrays alien.c-types kernel sequences math math.functions +hints math.order fry ; IN: math.vectors +GENERIC: element-type ( obj -- c-type ) + : vneg ( u -- v ) [ neg ] map ; : v+n ( u n -- v ) [ + ] curry map ; @@ -29,6 +31,13 @@ IN: math.vectors [ [ not ] 2dip pick [ + ] [ - ] if ] 2map nip ; +: 2saturate-map ( u v quot -- w ) + pick element-type '[ @ _ c-type-clamp ] 2map ; inline + +: vs+ ( u v -- w ) [ + ] 2saturate-map ; +: vs- ( u v -- w ) [ - ] 2saturate-map ; +: vs* ( u v -- w ) [ * ] 2saturate-map ; + : vfloor ( v -- _v_ ) [ floor ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ; diff --git a/basis/specialized-arrays/specialized-arrays-tests.factor b/basis/specialized-arrays/specialized-arrays-tests.factor index 5d88f42d50..526312e0aa 100755 --- a/basis/specialized-arrays/specialized-arrays-tests.factor +++ b/basis/specialized-arrays/specialized-arrays-tests.factor @@ -4,7 +4,7 @@ specialized-arrays.private sequences alien.c-types accessors kernel arrays combinators compiler compiler.units classes.struct combinators.smart compiler.tree.debugger math libc destructors sequences.private multiline eval words vocabs namespaces -assocs prettyprint alien.data ; +assocs prettyprint alien.data math.vectors ; FROM: alien.c-types => float ; SPECIALIZED-ARRAY: int @@ -13,6 +13,9 @@ SPECIALIZED-ARRAY: ushort SPECIALIZED-ARRAY: char SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: float +SPECIALIZED-ARRAY: ulonglong + +[ ulonglong ] [ ulonglong-array{ } element-type ] unit-test [ t ] [ { 1 2 3 } >int-array int-array? ] unit-test diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index a64d052fd1..9692980858 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien alien.c-types alien.data alien.parser assocs -byte-arrays classes compiler.units functors kernel lexer libc math -math.vectors.specialization namespaces parser prettyprint.custom -sequences sequences.private strings summary vocabs vocabs.loader -vocabs.parser vocabs.generated words fry combinators ; +USING: accessors alien alien.c-types alien.data alien.parser +assocs byte-arrays classes compiler.units functors kernel lexer +libc math math.vectors math.vectors.specialization namespaces +parser prettyprint.custom sequences sequences.private strings +summary vocabs vocabs.loader vocabs.parser vocabs.generated +words fry combinators present ; IN: specialized-arrays MIXIN: specialized-array @@ -53,14 +54,14 @@ TUPLE: A : ( alien len -- specialized-array ) A boa ; inline -: ( n -- specialized-array ) [ T ] keep ; inline +: ( n -- specialized-array ) [ \ T ] keep ; inline -: (A) ( n -- specialized-array ) [ T (underlying) ] keep ; inline +: (A) ( n -- specialized-array ) [ \ T (underlying) ] keep ; inline -: malloc-A ( len -- specialized-array ) [ T heap-size calloc ] keep ; inline +: malloc-A ( len -- specialized-array ) [ \ T heap-size calloc ] keep ; inline : byte-array>A ( byte-array -- specialized-array ) - dup length T heap-size /mod 0 = [ drop T bad-byte-array-length ] unless + dup length \ T heap-size /mod 0 = [ drop \ T bad-byte-array-length ] unless ; inline M: A clone [ underlying>> clone ] [ length>> ] bi ; inline @@ -81,12 +82,14 @@ M: A equal? over A instance? [ sequence= ] [ 2drop f ] if ; M: A resize [ - [ T heap-size * ] [ underlying>> ] bi* + [ \ T heap-size * ] [ underlying>> ] bi* resize-byte-array ] [ drop ] 2bi ; inline -M: A byte-length length T heap-size * ; inline +M: A byte-length length \ T heap-size * ; inline + +M: A element-type drop \ T ; inline M: A direct-array-syntax drop \ A@ ; @@ -116,15 +119,15 @@ M: word (underlying-type) "c-type" word-prop ; } cond ; : underlying-type-name ( c-type -- name ) - underlying-type dup word? [ name>> ] when ; + underlying-type present ; : specialized-array-vocab ( c-type -- vocab ) - "specialized-arrays.instances." prepend ; + present "specialized-arrays.instances." prepend ; PRIVATE> : define-array-vocab ( type -- vocab ) - underlying-type-name + underlying-type [ specialized-array-vocab ] [ '[ _ define-array ] ] bi generate-vocab ; diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S index d229b2cb79..fe97fe2179 100644 --- a/vm/cpu-x86.S +++ b/vm/cpu-x86.S @@ -77,12 +77,12 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)): DEF(bool,sse_version,(void)): mov $0x1,RETURN_REG cpuid - /* test $0x100000,%ecx + test $0x100000,%ecx jnz sse_42 test $0x80000,%ecx jnz sse_41 test $0x200,%ecx - jnz ssse_3 */ + jnz ssse_3 test $0x1,%ecx jnz sse_3 test $0x4000000,%edx From 3de9f4c33962f2fa1e4bf5e780fb4a5f57466144 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 23:16:14 -0500 Subject: [PATCH 05/15] mason.child: don't need command line switches for SSE version anymore --- extra/mason/child/child-tests.factor | 2 +- extra/mason/child/child.factor | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/extra/mason/child/child-tests.factor b/extra/mason/child/child-tests.factor index e8e1a9e0e9..2d5a7c6635 100644 --- a/extra/mason/child/child-tests.factor +++ b/extra/mason/child/child-tests.factor @@ -33,7 +33,7 @@ USING: mason.child mason.config tools.test namespaces io kernel sequences ; ] with-scope ] unit-test -[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" "-sse-version=30" } ] [ +[ { "./factor.com" "-i=boot.x86.32.image" "-no-user-init" } ] [ [ "winnt" target-os set "x86.32" target-cpu set diff --git a/extra/mason/child/child.factor b/extra/mason/child/child.factor index b3ee6c2c76..193ac1e212 100755 --- a/extra/mason/child/child.factor +++ b/extra/mason/child/child.factor @@ -34,7 +34,6 @@ IN: mason.child factor-vm , "-i=" boot-image-name append , "-no-user-init" , - target-cpu get { "x86.32" "x86.64" } member? [ "-sse-version=30" , ] when ] { } make ; : boot ( -- ) From 603a560eafd14f3b71a9faeebb79970d9cfb2f92 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 20 Sep 2009 23:24:30 -0500 Subject: [PATCH 06/15] cpu.x86: fix using list --- basis/cpu/x86/x86.factor | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index d40e03cdb6..4850403908 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -5,8 +5,7 @@ cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands cpu.architecture kernel kernel.private math memory namespaces make sequences words system layouts combinators math.order fry locals compiler.constants byte-arrays io macros quotations cpu.x86.features -cpu.x86.features.private compiler compiler.units init -compiler.constants vm byte-arrays +cpu.x86.features.private compiler compiler.units init vm compiler.cfg.registers compiler.cfg.instructions compiler.cfg.intrinsics From 74a72ce17c31b4df3f260154d65419f130f67078 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 22 Sep 2009 23:54:42 -0500 Subject: [PATCH 07/15] checksums.openssl: use correct API for creating EVP_MD_CTX, fixes crash on NetBSD --- basis/checksums/openssl/openssl.factor | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/basis/checksums/openssl/openssl.factor b/basis/checksums/openssl/openssl.factor index bc70230fd0..095ab38ace 100644 --- a/basis/checksums/openssl/openssl.factor +++ b/basis/checksums/openssl/openssl.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2008 Slava Pestov +! copyright (C) 2008 Slava Pestov ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays alien.c-types alien.data kernel continuations destructors sequences io openssl openssl.libcrypto @@ -23,10 +23,10 @@ TUPLE: evp-md-context < disposable handle ; : ( -- ctx ) evp-md-context new-disposable - EVP_MD_CTX dup EVP_MD_CTX_init >>handle ; + EVP_MD_CTX_create >>handle ; M: evp-md-context dispose* - handle>> EVP_MD_CTX_cleanup drop ; + handle>> EVP_MD_CTX_destroy ; : with-evp-md-context ( quot -- ) maybe-init-ssl [ ] dip with-disposal ; inline From 15ba7e299b333b63e349268a832ad369c62167fd Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 01:05:19 -0500 Subject: [PATCH 08/15] math.vectors.simd: fix all tests --- basis/math/vectors/simd/simd-docs.factor | 28 ++++------- basis/math/vectors/simd/simd-tests.factor | 60 +++++++++++++++-------- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index ef625ffff0..6d212f8e2b 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -1,5 +1,5 @@ -USING: help.markup help.syntax sequences math math.vectors -multiline kernel.private classes.tuple.private +USING: help.markup help.syntax help.vocabs sequences math +math.vectors multiline kernel.private classes.tuple.private math.vectors.simd.intrinsics cpu.architecture ; IN: math.vectors.simd @@ -159,22 +159,8 @@ ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" } "The compiler converts " { $link "math-vectors" } " into SIMD primitives automatically in cases where it is safe; this means that the input types are known to be SIMD vectors, and the CPU supports SIMD." $nl -"It is best to avoid calling these primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." -{ $subsection (simd-v+) } -{ $subsection (simd-vs+) } -{ $subsection (simd-v+-) } -{ $subsection (simd-v-) } -{ $subsection (simd-vs-) } -{ $subsection (simd-v*) } -{ $subsection (simd-vs*) } -{ $subsection (simd-v/) } -{ $subsection (simd-vmin) } -{ $subsection (simd-vmax) } -{ $subsection (simd-vsqrt) } -{ $subsection (simd-sum) } -{ $subsection (simd-broadcast) } -{ $subsection (simd-gather-2) } -{ $subsection (simd-gather-4) } +"It is best to avoid calling SIMD primitives directly. To write efficient high-level code that compiles down to primitives and avoids memory allocation, see " { $link "math.vectors.simd.efficiency" } "." +$nl "There are two primitives which are used to implement accessing SIMD vector fields of " { $link "classes.struct" } ":" { $subsection alien-vector } { $subsection set-alien-vector } @@ -185,12 +171,18 @@ ARTICLE: "math.vectors.simd.alien" "SIMD data in struct classes" $nl "Only SIMD struct fields are allowed at the moment; passing SIMD data as function parameters is not yet supported." ; +ARTICLE: "math.vectors.simd.accuracy" "Numerical accuracy of SIMD primitives" +"No guarantees are made that " { $vocab-link "math.vectors.simd" } " words will give identical results on different SSE versions, or between the hardware intrinsics and the software fallbacks." +$nl +"In particular, horizontal operations on " { $snippet "float-4" } " and " { $snippet "float-8" } " are affected by this. They are computed with lower precision in intrinsics than the software fallback. Horizontal opeartions include anything involving adding together the components of a vector, such as " { $link sum } " or " { $link normalize } "." ; + ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" "The " { $vocab-link "math.vectors.simd" } " vocabulary extends the " { $vocab-link "math.vectors" } " vocabulary to support efficient vector arithmetic on small, fixed-size vectors." { $subsection "math.vectors.simd.intro" } { $subsection "math.vectors.simd.types" } { $subsection "math.vectors.simd.words" } { $subsection "math.vectors.simd.support" } +{ $subsection "math.vectors.simd.accuracy" } { $subsection "math.vectors.simd.efficiency" } { $subsection "math.vectors.simd.alien" } { $subsection "math.vectors.simd.intrinsics" } ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 39afe3cb03..007ec338cd 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -74,17 +74,18 @@ CONSTANT: simd-classes : boa-ctors ( -- seq ) simd-classes [ [ name>> "-boa" append ] [ vocabulary>> ] bi lookup ] map ; -: check-optimizer ( seq inputs quot -- ) - [ +: check-optimizer ( seq inputs quot eq-quot -- ) + '[ + @ [ "print-mr" get [ nip test-mr mr. ] [ 2drop ] if ] [ [ call ] dip call ] - [ [ call ] dip compile-call ] 2tri = not - ] compose filter ; inline + [ [ call ] dip compile-call ] 2tri @ not + ] filter ; inline "== Checking -new constructors" print [ { } ] [ - simd-classes [ [ [ ] ] dip '[ _ new ] ] check-optimizer + simd-classes [ [ [ ] ] dip '[ _ new ] ] [ = ] check-optimizer ] unit-test [ { } ] [ @@ -96,7 +97,7 @@ CONSTANT: simd-classes [ { } ] [ with-ctors [ [ 1000 random '[ _ ] ] dip '[ { fixnum } declare _ execute ] - ] check-optimizer + ] [ = ] check-optimizer ] unit-test "== Checking -boa constructors" print @@ -107,7 +108,7 @@ CONSTANT: simd-classes [ nip [ 1000 random ] [ ] replicate-as ] [ fixnum swap '[ _ declare _ execute ] ] 2bi - ] check-optimizer + ] [ = ] check-optimizer ] unit-test "== Checking vector operations" print @@ -133,24 +134,44 @@ CONSTANT: simd-classes ] bi word '[ _ declare _ execute ] ; -: ops-to-check ( elt-class -- alist ) - [ vector-words >alist ] dip float = [ - [ drop { n/v v/n v/ normalize } member? not ] assoc-filter - ] unless ; +: remove-float-words ( alist -- alist' ) + [ drop { n/v v/n v/ normalize } member? not ] assoc-filter ; -: check-vector-ops ( class elt-class -- ) - [ nip ops-to-check ] 2keep - '[ first2 inputs _ _ check-vector-op ] check-optimizer ; inline +: ops-to-check ( elt-class -- alist ) + [ vector-words >alist ] dip + float = [ remove-float-words ] unless ; + +: check-vector-ops ( class elt-class compare-quot -- ) + [ + [ nip ops-to-check ] 2keep + '[ first2 inputs _ _ check-vector-op ] + ] dip check-optimizer ; inline + +: approx= ( x y -- ? ) + { + { [ 2dup [ float? ] both? ] [ -1.e8 ~ ] } + { [ 2dup [ sequence? ] both? ] [ + [ + { + { [ 2dup [ fp-nan? ] both? ] [ 2drop t ] } + { [ 2dup [ fp-nan? ] either? not ] [ -1.e8 ~ ] } + } cond + ] 2all? + ] } + } cond ; : simd-classes&reps ( -- alist ) simd-classes [ - dup name>> [ "float" head? ] [ "double" head? ] bi or - float fixnum ? - ] { } map>assoc ; + { + { [ dup name>> "float" head? ] [ float [ approx= ] ] } + { [ dup name>> "double" tail? ] [ float [ = ] ] } + [ fixnum [ = ] ] + } cond 3array + ] map ; simd-classes&reps [ - [ [ { } ] ] 2dip '[ _ _ check-vector-ops ] unit-test -] assoc-each + [ [ { } ] ] dip first3 '[ _ _ _ check-vector-ops ] unit-test +] each ! Other regressions [ 8000000 ] [ @@ -158,7 +179,6 @@ simd-classes&reps [ [ { int-8 } declare dup [ * ] [ + ] 2map-reduce ] compile-call ] unit-test - ! Vector alien intrinsics [ float-4{ 1 2 3 4 } ] [ [ From 43fa252af50997095d1a1ee492fc2fc92d330ada Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 02:46:54 -0500 Subject: [PATCH 09/15] math.vectors.simd: new operations: vabs vsqrt vbitand vbitor vbitxor --- .../cfg/instructions/instructions.factor | 26 ++++- .../compiler/cfg/intrinsics/intrinsics.factor | 6 +- .../cfg/two-operand/two-operand.factor | 5 +- basis/compiler/codegen/codegen.factor | 4 + .../tree/propagation/simd/simd.factor | 5 + basis/cpu/architecture/architecture.factor | 8 ++ basis/cpu/x86/x86.factor | 100 +++++++++++++++--- .../math/vectors/simd/functor/functor.factor | 81 ++++++-------- .../vectors/simd/intrinsics/intrinsics.factor | 54 +++++++--- basis/math/vectors/simd/simd-tests.factor | 4 +- .../specialization/specialization.factor | 5 + basis/math/vectors/vectors-docs.factor | 9 +- basis/math/vectors/vectors.factor | 27 ++++- 13 files changed, 252 insertions(+), 82 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 87e981f362..874093ed40 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -350,14 +350,34 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##horizontal-add-vector +def: dst/scalar-rep +use: src +literal: rep ; + +PURE-INSN: ##abs-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##sqrt-vector def: dst use: src literal: rep ; -PURE-INSN: ##horizontal-add-vector -def: dst/scalar-rep -use: src +PURE-INSN: ##and-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##or-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##xor-vector +def: dst +use: src1 src2 literal: rep ; ! Boxing and unboxing aliens diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index b9835827fa..d2f158f06d 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -164,7 +164,11 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-v/) [ [ ^^div-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmin) [ [ ^^min-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vmax) [ [ ^^max-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vabs) [ [ ^^abs-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vsqrt) [ [ ^^sqrt-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitand) [ [ ^^and-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-broadcast) [ [ ^^broadcast-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor index 1186e6b41f..45d248f8f4 100644 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ b/basis/compiler/cfg/two-operand/two-operand.factor @@ -55,7 +55,10 @@ UNION: two-operand-insn ##saturated-mul-vector ##div-vector ##min-vector - ##max-vector ; + ##max-vector + ##and-vector + ##or-vector + ##xor-vector ; GENERIC: convert-two-operand* ( insn -- ) diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index e7a2548eff..43d11b5d4f 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -180,6 +180,10 @@ CODEGEN: ##min-vector %min-vector CODEGEN: ##max-vector %max-vector CODEGEN: ##sqrt-vector %sqrt-vector CODEGEN: ##horizontal-add-vector %horizontal-add-vector +CODEGEN: ##abs-vector %abs-vector +CODEGEN: ##and-vector %and-vector +CODEGEN: ##or-vector %or-vector +CODEGEN: ##xor-vector %xor-vector CODEGEN: ##box-alien %box-alien CODEGEN: ##box-displaced-alien %box-displaced-alien CODEGEN: ##unbox-alien %unbox-alien diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index db39985c94..fadb382398 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -13,7 +13,12 @@ IN: compiler.tree.propagation.simd (simd-v/) (simd-vmin) (simd-vmax) + (simd-sum) + (simd-vabs) (simd-vsqrt) + (simd-vbitand) + (simd-vbitor) + (simd-vbitxor) (simd-broadcast) (simd-gather-2) (simd-gather-4) diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 6bc78836cd..2dbe724f0a 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -192,6 +192,10 @@ HOOK: %min-vector cpu ( dst src1 src2 rep -- ) HOOK: %max-vector cpu ( dst src1 src2 rep -- ) HOOK: %sqrt-vector cpu ( dst src rep -- ) HOOK: %horizontal-add-vector cpu ( dst src rep -- ) +HOOK: %abs-vector cpu ( dst src rep -- ) +HOOK: %and-vector cpu ( dst src1 src2 rep -- ) +HOOK: %or-vector cpu ( dst src1 src2 rep -- ) +HOOK: %xor-vector cpu ( dst src1 src2 rep -- ) HOOK: %broadcast-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) @@ -208,6 +212,10 @@ HOOK: %min-vector-reps cpu ( -- reps ) HOOK: %max-vector-reps cpu ( -- reps ) HOOK: %sqrt-vector-reps cpu ( -- reps ) HOOK: %horizontal-add-vector-reps cpu ( -- reps ) +HOOK: %abs-vector-reps cpu ( -- reps ) +HOOK: %and-vector-reps cpu ( -- reps ) +HOOK: %or-vector-reps cpu ( -- reps ) +HOOK: %xor-vector-reps cpu ( -- reps ) HOOK: %unbox-alien cpu ( dst src -- ) HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4850403908..1a96e93c63 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -410,30 +410,63 @@ M: x86 %div-vector-reps M: x86 %min-vector ( dst src1 src2 rep -- ) { - { float-4-rep [ MINPS ] } - { double-2-rep [ MINPD ] } + { char-16-rep [ PMINSB ] } { uchar-16-rep [ PMINUB ] } { short-8-rep [ PMINSW ] } + { ushort-8-rep [ PMINUW ] } + { int-4-rep [ PMINSD ] } + { uint-4-rep [ PMINUD ] } + { float-4-rep [ MINPS ] } + { double-2-rep [ MINPD ] } } case drop ; M: x86 %min-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep short-8-rep uchar-16-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; M: x86 %max-vector ( dst src1 src2 rep -- ) { - { float-4-rep [ MAXPS ] } - { double-2-rep [ MAXPD ] } + { char-16-rep [ PMAXSB ] } { uchar-16-rep [ PMAXUB ] } { short-8-rep [ PMAXSW ] } + { ushort-8-rep [ PMAXUW ] } + { int-4-rep [ PMAXSD ] } + { uint-4-rep [ PMAXUD ] } + { float-4-rep [ MAXPS ] } + { double-2-rep [ MAXPD ] } } case drop ; M: x86 %max-vector-reps { { sse? { float-4-rep } } - { sse2? { double-2-rep short-8-rep uchar-16-rep } } + { sse2? { uchar-16-rep short-8-rep double-2-rep short-8-rep uchar-16-rep } } + { sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %horizontal-add-vector ( dst src rep -- ) + { + { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } + { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } + } case ; + +M: x86 %horizontal-add-vector-reps + { + { sse3? { float-4-rep double-2-rep } } + } available-reps ; + +M: x86 %abs-vector ( dst src rep -- ) + { + { char-16-rep [ PABSB ] } + { short-8-rep [ PABSW ] } + { int-4-rep [ PABSD ] } + } case ; + +M: x86 %abs-vector-reps + { + { ssse3? { char-16-rep short-8-rep int-4-rep } } } available-reps ; M: x86 %sqrt-vector ( dst src rep -- ) @@ -448,15 +481,58 @@ M: x86 %sqrt-vector-reps { sse2? { double-2-rep } } } available-reps ; -M: x86 %horizontal-add-vector ( dst src rep -- ) +M: x86 %and-vector ( dst src1 src2 rep -- ) { - { float-4-rep [ [ MOVAPS ] [ HADDPS ] [ HADDPS ] 2tri ] } - { double-2-rep [ [ MOVAPD ] [ HADDPD ] 2bi ] } - } case ; + { float-4-rep [ ANDPS ] } + { double-2-rep [ ANDPD ] } + { char-16-rep [ PAND ] } + { uchar-16-rep [ PAND ] } + { short-8-rep [ PAND ] } + { ushort-8-rep [ PAND ] } + { int-4-rep [ PAND ] } + { uint-4-rep [ PAND ] } + } case drop ; -M: x86 %horizontal-add-vector-reps +M: x86 %and-vector-reps { - { sse3? { float-4-rep double-2-rep } } + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %or-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ ORPS ] } + { double-2-rep [ ORPD ] } + { char-16-rep [ POR ] } + { uchar-16-rep [ POR ] } + { short-8-rep [ POR ] } + { ushort-8-rep [ POR ] } + { int-4-rep [ POR ] } + { uint-4-rep [ POR ] } + } case drop ; + +M: x86 %or-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %xor-vector ( dst src1 src2 rep -- ) + { + { float-4-rep [ XORPS ] } + { double-2-rep [ XORPD ] } + { char-16-rep [ PXOR ] } + { uchar-16-rep [ PXOR ] } + { short-8-rep [ PXOR ] } + { ushort-8-rep [ PXOR ] } + { int-4-rep [ PXOR ] } + { uint-4-rep [ PXOR ] } + } case drop ; + +M: x86 %xor-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; M: x86 %unbox-alien ( dst src -- ) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 7d84b18225..e934a641c4 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -4,7 +4,8 @@ USING: accessors alien.c-types assocs byte-arrays classes effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences -sequences.private strings words definitions macros cpu.architecture ; +sequences.private strings words definitions macros cpu.architecture +namespaces arrays quotations ; QUALIFIED-WITH: math m IN: math.vectors.simd.functor @@ -38,24 +39,19 @@ MACRO: simd-boa ( rep class -- simd-array ) { "simd-vector" } ; : supported-simd-ops ( assoc rep -- assoc' ) - [ - { - { v+ (simd-v+) } - { vs+ (simd-vs+) } - { v+- (simd-v+-) } - { v- (simd-v-) } - { vs- (simd-vs-) } - { v* (simd-v*) } - { vs* (simd-vs*) } - { v/ (simd-v/) } - { vmin (simd-vmin) } - { vmax (simd-vmax) } - { sum (simd-sum) } - } - ] dip + [ simd-ops get ] dip '[ nip _ swap supported-simd-op? ] assoc-filter '[ drop _ key? ] assoc-filter ; +ERROR: bad-schema schema ; + +: low-level-ops ( box-quot: ( inputs... simd-op -- outputs... ) -- alist ) + [ simd-ops get ] dip '[ + 1quotation + over word-schema _ ?at [ bad-schema ] unless + [ ] 2sequence + ] assoc-map ; + :: high-level-ops ( ctor elt-class -- assoc ) ! Some SIMD operations are defined in terms of others. { @@ -82,11 +78,17 @@ MACRO: simd-boa ( rep class -- simd-array ) } append ] when ; -:: simd-vector-words ( class ctor rep assoc -- ) +:: simd-vector-words ( class ctor rep vv->v v->v v->n -- ) rep rep-component-type c-type-boxed-class :> elt-class class elt-class - assoc rep supported-simd-ops + { + { { +vector+ +vector+ -> +vector+ } vv->v } + { { +vector+ -> +vector+ } v->v } + { { +vector+ -> +scalar+ } v->n } + { { +vector+ -> +nonnegative+ } v->n } + } low-level-ops + rep supported-simd-ops ctor elt-class high-level-ops assoc-union specialize-vector-words ; @@ -116,6 +118,7 @@ SET-NTH [ T dup c-setter array-accessor ] A-rep [ A name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op +A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op WHERE @@ -172,23 +175,13 @@ INSTANCE: A sequence : A-vv->v-op ( v1 v2 quot -- v3 ) [ [ underlying>> ] bi@ A-rep ] dip call \ A boa ; inline +: A-v->v-op ( v1 quot -- v2 ) + [ underlying>> A-rep ] dip call \ A boa ; inline + : A-v->n-op ( v quot -- n ) [ underlying>> A-rep ] dip call ; inline -\ A \ A-with \ A-rep H{ - { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } - { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } - { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } - { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } - { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } - { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } - { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } - { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } - { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } - { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } - { sum [ [ (simd-sum) ] \ A-v->n-op execute ] } -} simd-vector-words - +\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-rep define-simd-128-type PRIVATE> @@ -237,6 +230,7 @@ A-deref DEFINES-PRIVATE ${A}-deref A-rep [ A/2 name>> "-rep" append "cpu.architecture" lookup ] A-vv->v-op DEFINES-PRIVATE ${A}-vv->v-op +A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op WHERE @@ -302,24 +296,15 @@ INSTANCE: A sequence [ [ [ underlying2>> ] bi@ A-rep ] dip call ] 3bi \ A boa ; inline -: A-v->n-op ( v1 combine-quot reduce-quot -- v2 ) - [ [ [ underlying1>> ] [ underlying2>> ] bi A-rep ] dip call A-rep ] - dip call ; inline +: A-v->v-op ( v1 combine-quot -- v2 ) + [ [ underlying1>> A-rep ] dip call ] + [ [ underlying2>> A-rep ] dip call ] 2bi + \ A boa ; inline -\ A \ A-with \ A-rep H{ - { v+ [ [ (simd-v+) ] \ A-vv->v-op execute ] } - { vs+ [ [ (simd-vs+) ] \ A-vv->v-op execute ] } - { v- [ [ (simd-v-) ] \ A-vv->v-op execute ] } - { vs- [ [ (simd-vs-) ] \ A-vv->v-op execute ] } - { v+- [ [ (simd-v+-) ] \ A-vv->v-op execute ] } - { v* [ [ (simd-v*) ] \ A-vv->v-op execute ] } - { vs* [ [ (simd-vs*) ] \ A-vv->v-op execute ] } - { v/ [ [ (simd-v/) ] \ A-vv->v-op execute ] } - { vmin [ [ (simd-vmin) ] \ A-vv->v-op execute ] } - { vmax [ [ (simd-vmax) ] \ A-vv->v-op execute ] } - { sum [ [ (simd-v+) ] [ (simd-sum) ] \ A-v->n-op execute ] } -} simd-vector-words +: A-v->n-op ( v1 combine-quot -- v2 ) + [ [ underlying1>> ] [ underlying2>> ] bi A-rep (simd-v+) A-rep ] dip call ; inline +\ A \ A-with \ A-rep \ A-vv->v-op \ A-v->v-op \ A-v->n-op simd-vector-words \ A \ A-rep define-simd-256-type ;FUNCTOR diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 63214f7da6..2c1f76cfe1 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -2,23 +2,47 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien alien.c-types alien.data assocs combinators cpu.architecture fry generalizations kernel libc macros math -sequences ; +sequences effects accessors namespaces lexer parser vocabs.parser +words arrays math.vectors ; IN: math.vectors.simd.intrinsics ERROR: bad-simd-call ; -: (simd-v+) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v-) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v+-) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vs+) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vs-) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vs*) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v*) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-v/) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vmin) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vmax) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-vsqrt) ( v1 v2 rep -- v3 ) bad-simd-call ; -: (simd-sum) ( v1 rep -- v2 ) bad-simd-call ; +<< + +: simd-effect ( word -- effect ) + stack-effect [ in>> "rep" suffix ] [ out>> ] bi ; + +SYMBOL: simd-ops + +V{ } clone simd-ops set-global + +SYNTAX: SIMD-OP: + scan-word dup name>> "(simd-" ")" surround create-in + [ nip [ bad-simd-call ] define ] + [ [ simd-effect ] dip set-stack-effect ] + [ 2array simd-ops get push ] + 2tri ; + +>> + +SIMD-OP: v+ +SIMD-OP: v- +SIMD-OP: v+- +SIMD-OP: vs+ +SIMD-OP: vs- +SIMD-OP: vs* +SIMD-OP: v* +SIMD-OP: v/ +SIMD-OP: vmin +SIMD-OP: vmax +SIMD-OP: vsqrt +SIMD-OP: sum +SIMD-OP: vabs +SIMD-OP: vbitand +SIMD-OP: vbitor +SIMD-OP: vbitxor + : (simd-broadcast) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; @@ -82,6 +106,10 @@ M: vector-rep supported-simd-op? { \ (simd-vmax) [ %max-vector-reps ] } { \ (simd-vsqrt) [ %sqrt-vector-reps ] } { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-vabs) [ %abs-vector-reps ] } + { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitor) [ %or-vector-reps ] } + { \ (simd-vbitxor) [ %xor-vector-reps ] } { \ (simd-broadcast) [ %broadcast-vector-reps ] } { \ (simd-gather-2) [ %gather-vector-2-reps ] } { \ (simd-gather-4) [ %gather-vector-4-reps ] } diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 007ec338cd..db8597fc9d 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -135,7 +135,7 @@ CONSTANT: simd-classes word '[ _ declare _ execute ] ; : remove-float-words ( alist -- alist' ) - [ drop { n/v v/n v/ normalize } member? not ] assoc-filter ; + [ drop { vsqrt n/v v/n v/ normalize } member? not ] assoc-filter ; : ops-to-check ( elt-class -- alist ) [ vector-words >alist ] dip @@ -242,3 +242,5 @@ STRUCT: simd-struct { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] compile-call ] unit-test + +[ ] [ char-16 new 1array stack. ] unit-test diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 2fb36d428a..bf2dac29d6 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -72,6 +72,11 @@ H{ { vneg { +vector+ -> +vector+ } } { vtruncate { +vector+ -> +vector+ } } { sum { +vector+ -> +scalar+ } } + { vabs { +vector+ -> +vector+ } } + { vsqrt { +vector+ -> +vector+ } } + { vbitand { +vector+ +vector+ -> +vector+ } } + { vbitor { +vector+ +vector+ -> +vector+ } } + { vbitxor { +vector+ +vector+ -> +vector+ } } } PREDICATE: vector-word < word vector-words key? ; diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index ce635b6d60..67ac27400e 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -30,7 +30,10 @@ $nl "Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:" { $subsection vs+ } { $subsection vs- } -{ $subsection vs* } ; +{ $subsection vs* } +"Comparing vectors:" +{ $subsection distance } +{ $subsection v~ } ; ABOUT: "math-vectors" @@ -144,6 +147,10 @@ HELP: normalize { $values { "u" "a sequence of numbers, not all zero" } { "v" "a sequence of numbers" } } { $description "Outputs a vector with the same direction as " { $snippet "u" } " but length 1." } ; +HELP: distance +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } +{ $description "Outputs the Euclidean distance between two vectors." } ; + HELP: set-axis { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "axis" "a sequence of 0/1" } { "w" "a sequence of numbers" } } { $description "Using " { $snippet "w" } " as a template, creates a new sequence containing corresponding elements from " { $snippet "u" } " in place of 0, and corresponding elements from " { $snippet "v" } " in place of 1." } diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 3a1b2875a9..eb97fba663 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,7 +1,8 @@ -! Copyright (C) 2005, 2008 Slava Pestov. +! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays alien.c-types kernel sequences math math.functions -hints math.order fry ; +hints math.order fry combinators ; +QUALIFIED-WITH: alien.c-types c IN: math.vectors GENERIC: element-type ( obj -- c-type ) @@ -31,13 +32,35 @@ GENERIC: element-type ( obj -- c-type ) [ [ not ] 2dip pick [ + ] [ - ] if ] 2map nip ; + + : vs+ ( u v -- w ) [ + ] 2saturate-map ; : vs- ( u v -- w ) [ - ] 2saturate-map ; : vs* ( u v -- w ) [ * ] 2saturate-map ; +: vabs ( u -- v ) [ abs ] map ; +: vsqrt ( u -- v ) [ sqrt ] map ; + +bits ] bi@ ] dip call bits>double ] } + { c:float [ [ [ float>bits ] bi@ ] dip call bits>float ] } + [ drop call ] + } case ; inline + +PRIVATE> + +: vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; +: vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ; +: vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; + : vfloor ( v -- _v_ ) [ floor ] map ; : vceiling ( v -- ^v^ ) [ ceiling ] map ; : vtruncate ( v -- -v- ) [ truncate ] map ; From 7d39499d86dec087d0f3c5bd7e77159959b31529 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 02:47:56 -0500 Subject: [PATCH 10/15] compiler.cfg.ssa.cssa: correctly handle overflowing fixnum instructions --- basis/compiler/cfg/ssa/cssa/cssa.factor | 18 ++++++++++++++---- basis/compiler/tests/codegen.factor | 20 +++++++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/basis/compiler/cfg/ssa/cssa/cssa.factor b/basis/compiler/cfg/ssa/cssa/cssa.factor index 14287e900f..d58cebac65 100644 --- a/basis/compiler/cfg/ssa/cssa/cssa.factor +++ b/basis/compiler/cfg/ssa/cssa/cssa.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors assocs kernel locals fry +USING: accessors assocs kernel locals fry sequences cpu.architecture compiler.cfg.rpo +compiler.cfg.def-use compiler.cfg.utilities compiler.cfg.registers compiler.cfg.instructions @@ -13,10 +14,19 @@ IN: compiler.cfg.ssa.cssa ! selection, so it must keep track of representations when introducing ! new values. +: insert-copy? ( bb vreg -- ? ) + ! If the last instruction defines a value (which means it is + ! ##fixnum-add, ##fixnum-sub or ##fixnum-mul) then we don't + ! need to insert a copy since in fact doing so will result + ! in incorrect code. + [ instructions>> last defs-vreg ] dip eq? not ; + :: insert-copy ( bb src rep -- bb dst ) - rep next-vreg-rep :> dst - bb [ dst src rep src rep-of emit-conversion ] add-instructions - bb dst ; + bb src insert-copy? [ + rep next-vreg-rep :> dst + bb [ dst src rep src rep-of emit-conversion ] add-instructions + bb dst + ] [ bb src ] if ; : convert-phi ( ##phi -- ) dup dst>> rep-of '[ [ _ insert-copy ] assoc-map ] change-inputs drop ; diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor index 14ed2294c7..3dbde076a6 100644 --- a/basis/compiler/tests/codegen.factor +++ b/basis/compiler/tests/codegen.factor @@ -3,7 +3,7 @@ math hashtables.private math.private namespaces sequences tools.test namespaces.private slots.private sequences.private byte-arrays alien alien.accessors layouts words definitions compiler.units io combinators vectors grouping make alien.c-types combinators.short-circuit -math.order math.libm math.parser ; +math.order math.libm math.parser alien.c-types ; FROM: math => float ; QUALIFIED: namespaces.private IN: compiler.tests.codegen @@ -431,3 +431,21 @@ cell 4 = [ ] curry each-integer ] compile-call ] unit-test + +TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read-only } ; + +[ 2 ] [ + little-endian? + T{ myseq f B{ 1 0 0 0 } B{ 1 0 0 0 } } + T{ myseq f B{ 0 0 0 1 } B{ 0 0 0 1 } } ? + [ + { myseq } declare + [ 0 2 ] dip dup + [ + [ + over 1 < [ underlying1>> ] [ [ 1 - ] dip underlying2>> ] if + swap 4 * >fixnum alien-signed-4 + ] bi-curry@ bi * + + ] 2curry each-integer + ] compile-call +] unit-test From 3d4ce094d282272265e76a767d89fc659e8c260c Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 02:48:23 -0500 Subject: [PATCH 11/15] prettyprint.backend: don't bomb if printing length-limited sequence which doesn't like being resized itself. Instead convert to array of elements first --- basis/prettyprint/backend/backend.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/prettyprint/backend/backend.factor b/basis/prettyprint/backend/backend.factor index cba40bbff1..fb47c50fbe 100644 --- a/basis/prettyprint/backend/backend.factor +++ b/basis/prettyprint/backend/backend.factor @@ -173,6 +173,7 @@ M: tuple pprint* ] when ; : pprint-elements ( seq -- ) + >array do-length-limit [ [ pprint* ] each ] dip [ "~" swap number>string " more~" 3append text ] when* ; From e1efb189a44f131fba0809bc1dcd4e08928df7d6 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 03:08:45 -0500 Subject: [PATCH 12/15] math and math.vectors: improved documentation --- basis/math/vectors/vectors-docs.factor | 96 +++++++++++++++++++++++--- basis/math/vectors/vectors.factor | 10 +-- core/math/math-docs.factor | 11 ++- 3 files changed, 98 insertions(+), 19 deletions(-) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 67ac27400e..3790e38d55 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -1,4 +1,4 @@ -USING: help.markup help.syntax math sequences ; +USING: help.markup help.syntax math math.functions sequences ; IN: math.vectors ARTICLE: "math-vectors" "Vector arithmetic" @@ -14,26 +14,46 @@ $nl { $subsection n+v } { $subsection v-n } { $subsection n-v } -"Combining two vectors to form another vector with " { $link 2map } ":" +"Vector unary operations:" +{ $subsection vneg } +{ $subsection vabs } +{ $subsection vsqrt } +{ $subsection vfloor } +{ $subsection vceiling } +{ $subsection vtruncate } +"Vector/vector binary operations:" { $subsection v+ } { $subsection v- } { $subsection v+- } { $subsection v* } { $subsection v/ } +"Saturated arithmetic (only on " { $link "specialized-arrays" } "):" +{ $subsection vs+ } +{ $subsection vs- } +{ $subsection vs* } +"Comparisons:" { $subsection vmax } { $subsection vmin } +"Bitwise operations:" +{ $subsection vbitand } +{ $subsection vbitor } +{ $subsection vbitxor } "Inner product and norm:" { $subsection v. } { $subsection norm } { $subsection norm-sq } { $subsection normalize } -"Saturated arithmetic may be performed on " { $link "specialized-arrays" } "; the results are clamped to the minimum and maximum bounds of the array element type, instead of wrapping around:" -{ $subsection vs+ } -{ $subsection vs- } -{ $subsection vs* } "Comparing vectors:" { $subsection distance } -{ $subsection v~ } ; +{ $subsection v~ } +"Other functions:" +{ $subsection vsupremum } +{ $subsection vinfimum } +{ $subsection trilerp } +{ $subsection bilerp } +{ $subsection vlerp } +{ $subsection vnlerp } +{ $subsection vbilerp } ; ABOUT: "math-vectors" @@ -41,6 +61,43 @@ HELP: vneg { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } { $description "Negates each element of " { $snippet "u" } "." } ; +HELP: vabs +{ $values { "u" "a sequence of numbers" } { "v" "a sequence of non-negative real numbers" } } +{ $description "Takes the absolute value of each element of " { $snippet "u" } "." } ; + +HELP: vsqrt +{ $values { "u" "a sequence of non-negative real numbers" } { "v" "a sequence of non-negative real numbers" } } +{ $description "Takes the square root of each element of " { $snippet "u" } "." } +{ $warning "For performance reasons, this does not work with negative inputs, unlike " { $link sqrt } "." } ; + +HELP: vfloor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Takes the " { $link floor } " of each element of " { $snippet "u" } "." } ; + +HELP: vceiling +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Takes the " { $link ceiling } " of each element of " { $snippet "u" } "." } ; + +HELP: vtruncate +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } } +{ $description "Truncates each element of " { $snippet "u" } "." } ; + +HELP: n+v +{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } +{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ; + +HELP: v+n +{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } +{ $description "Adds " { $snippet "n" } " to each element of " { $snippet "u" } "." } ; + +HELP: n-v +{ $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } +{ $description "Subtracts each element of " { $snippet "u" } " from " { $snippet "n" } "." } ; + +HELP: v-n +{ $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } +{ $description "Subtracts " { $snippet "n" } " from each element of " { $snippet "u" } "." } ; + HELP: n*v { $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } { $description "Multiplies each element of " { $snippet "u" } " by " { $snippet "n" } "." } ; @@ -51,11 +108,13 @@ HELP: v*n HELP: n/v { $values { "n" "a number" } { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } } -{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } ; +{ $description "Divides " { $snippet "n" } " by each element of " { $snippet "u" } "." } +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: v/n { $values { "u" "a sequence of numbers" } { "n" "a number" } { "v" "a sequence of numbers" } } -{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } ; +{ $description "Divides each element of " { $snippet "u" } " by " { $snippet "n" } "." } +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: v+ { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } @@ -87,7 +146,7 @@ HELP: v* HELP: v/ { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Divides " { $snippet "u" } " by " { $snippet "v" } " component-wise." } -{ $errors "Throws an error if an integer division by zero occurs." } ; +{ $errors "May throw an error if a division by zero occurs; see " { $link "division-by-zero" } "." } ; HELP: vmax { $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } @@ -104,7 +163,7 @@ HELP: v. { $description "Computes the real-valued dot product." } { $notes "This word can also take complex number sequences as input, however mathematically it will compute the wrong result. The complex-valued dot product is defined differently:" - { $snippet "0 [ conjugate * + ] 2reduce" } + { $code "0 [ conjugate * + ] 2reduce" } } ; HELP: vs+ @@ -135,6 +194,21 @@ HELP: vs* { $values { "u" "a sequence of numbers" } { "v" "a sequence of numbers" } { "w" "a sequence of numbers" } } { $description "Multiplies " { $snippet "u" } " and " { $snippet "v" } " component-wise with saturation." } ; +HELP: vbitand +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise and of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitand } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + +HELP: vbitor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + +HELP: vbitxor +{ $values { "u" "a sequence of real numbers" } { "v" "a sequence of real numbers" } { "w" "a sequence of real numbers" } } +{ $description "Takes the bitwise exclusive or of " { $snippet "u" } " and " { $snippet "v" } " component-wise." } +{ $notes "Unlike " { $link bitxor } ", this word may be used on a specialized array of floats or doubles, in which case the bitwise representation of the floating point numbers is operated upon." } ; + HELP: norm-sq { $values { "v" "a sequence of numbers" } { "x" "a non-negative real number" } } { $description "Computes the squared length of a mathematical vector." } ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index eb97fba663..4b6f67544a 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays alien.c-types kernel sequences math math.functions -hints math.order fry combinators ; +hints math.order math.libm fry combinators ; QUALIFIED-WITH: alien.c-types c IN: math.vectors @@ -44,7 +44,7 @@ PRIVATE> : vs* ( u v -- w ) [ * ] 2saturate-map ; : vabs ( u -- v ) [ abs ] map ; -: vsqrt ( u -- v ) [ sqrt ] map ; +: vsqrt ( u -- v ) [ >float fsqrt ] map ; : vbitor ( u v -- w ) over '[ _ [ bitor ] fp-bitwise-op ] 2map ; : vbitxor ( u v -- w ) over '[ _ [ bitxor ] fp-bitwise-op ] 2map ; -: vfloor ( v -- _v_ ) [ floor ] map ; -: vceiling ( v -- ^v^ ) [ ceiling ] map ; -: vtruncate ( v -- -v- ) [ truncate ] map ; +: vfloor ( u -- v ) [ floor ] map ; +: vceiling ( u -- v ) [ ceiling ] map ; +: vtruncate ( u -- v ) [ truncate ] map ; : vsupremum ( seq -- vmax ) [ ] [ vmax ] map-reduce ; : vinfimum ( seq -- vmin ) [ ] [ vmin ] map-reduce ; diff --git a/core/math/math-docs.factor b/core/math/math-docs.factor index e5de106bbb..e6805d693b 100644 --- a/core/math/math-docs.factor +++ b/core/math/math-docs.factor @@ -434,11 +434,15 @@ HELP: byte-array>bignum { $description "Converts a byte-array, interpreted as little-endian, into a bignum integer. User code should call " { $link le> } " or " { $link be> } " instead." } ; ARTICLE: "division-by-zero" "Division by zero" -"Floating point division never raises an error if the denominator is zero. This means that if at least one of the two inputs to " { $link / } ", " { $link /f } " or " { $link mod } " is a float, the result will be a floating point infinity or not a number value." +"Behavior of division operations when a denominator of zero is used depends on the data types in question, as well as the platform being used." +$nl +"Floating point division only throws an error if the appropriate traps are enabled in the floating point environment. If traps are disabled, a Not-a-number value or an infinity is output, depending on whether the numerator is zero or non-zero." +$nl +"Floating point traps are disabled by default and the " { $vocab-link "math.floats.env" } " vocabulary provides words to enable them. Floating point division is performed by " { $link / } ", " { $link /f } " or " { $link mod } " if at least one of the two inputs is a float. Floating point division is always performed by " { $link /f } "." $nl "The behavior of integer division is hardware specific. On x86 processors, " { $link /i } " and " { $link mod } " raise an error if both inputs are integers and the denominator is zero. On PowerPC, integer division by zero yields a result of zero." $nl -"On the other hand, the " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ; +"The " { $link / } " word, when given integer arguments, implements a much more expensive division algorithm which always yields an exact rational answer, and this word always tests for division by zero explicitly." ; ARTICLE: "number-protocol" "Number protocol" "Math operations obey certain numerical upgrade rules. If one of the inputs is a bignum and the other is a fixnum, the latter is first coerced to a bignum; if one of the inputs is a float, the other is coerced to a float." @@ -459,7 +463,8 @@ $nl { $subsection > } { $subsection >= } "Numbers can be compared for equality using " { $link = } ", or a less precise test which disregards types:" -{ $subsection number= } ; +{ $subsection number= } +{ $see-also "math.floats.compare" } ; ARTICLE: "modular-arithmetic" "Modular arithmetic" { $subsection mod } From 43acea72b994317329e215c7ff798291625c20ed Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 03:08:54 -0500 Subject: [PATCH 13/15] specialized-vectors: improved documentation --- basis/specialized-vectors/specialized-vectors-docs.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/specialized-vectors/specialized-vectors-docs.factor b/basis/specialized-vectors/specialized-vectors-docs.factor index 9c575fe73a..c773356a64 100644 --- a/basis/specialized-vectors/specialized-vectors-docs.factor +++ b/basis/specialized-vectors/specialized-vectors-docs.factor @@ -16,8 +16,8 @@ ARTICLE: "specialized-vector-words" "Specialized vector words" } "Behind the scenes, these words are placed in a vocabulary named " { $snippet "specialized-vectors.instances.T" } ", however this vocabulary should not be placed in a " { $link POSTPONE: USING: } " form directly. Instead, always use " { $link POSTPONE: SPECIALIZED-VECTOR: } ". This ensures that the vocabulary can get generated the first time it is needed." ; -ARTICLE: "specialized-vector-c" "Passing specialized arrays to C functions" -"Each specialized array has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ; +ARTICLE: "specialized-vector-c" "Passing specialized vectors to C functions" +"Each specialized vector has a " { $slot "underlying" } " slot holding a specialized array, which in turn has an " { $slot "underlying" } " slot holding a " { $link byte-array } " with the raw data. Passing a specialized vector as a parameter to a C function call will automatically extract the underlying data. To get at the underlying data directly, call the " { $link >c-ptr } " word on a specialized vector." ; ARTICLE: "specialized-vectors" "Specialized vectors" "The " { $vocab-link "specialized-vectors" } " vocabulary implements resizable sequence types for storing machine values in a space-efficient manner without boxing." From f017f8904b46643c3d6f83ad90b3d4a0e24a9c3e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 03:14:06 -0500 Subject: [PATCH 14/15] math.vectors.simd: Improved documentation --- basis/math/vectors/simd/simd-docs.factor | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 6d212f8e2b..02ed7f5ff8 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -17,13 +17,17 @@ $nl "There should never be any reason to use " { $link "math.vectors.simd.intrinsics" } " directly, but they too have a straightforward, but lower-level, interface." ; ARTICLE: "math.vectors.simd.support" "Supported SIMD instruction sets and operations" -"At present, the SIMD support makes use of SSE, SSE2 and a few SSE3 instructions on x86 CPUs." +"At present, the SIMD support makes use of a subset of SSE up to SSE4.1. The subset used depends on the current CPU type." $nl "SSE1 only supports single-precision SIMD (" { $snippet "float-4" } " and " { $snippet "float-8" } ")." $nl -"SSE2 introduces double-precision and integer SIMD." +"SSE2 introduces double-precision SIMD (" { $snippet "double-2" } " and " { $snippet "double-4" } ") and integer SIMD (all types). Integer SIMD in missing a few features, in particular the " { $link vmin } " and " { $link vmax } " operations only work on " { $snippet "uchar-16" } " and " { $snippet "short-8" } "." $nl -"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } ". If SSE3 is not available, software fallbacks are used for " { $link sum } " and related words." +"SSE3 introduces horizontal adds (summing all components of a single vector register), which is useful for computing dot products. Where available, SSE3 operations are used to speed up " { $link sum } ", " { $link v. } ", " { $link norm-sq } ", " { $link norm } ", and " { $link distance } "." +$nl +"SSSE3 introduces " { $link vabs } " for " { $snippet "char-16" } ", " { $snippet "short-8" } " and " { $snippet "int-4" } "." +$nl +"SSE4.1 introduces " { $link vmin } " and " { $link vmax } " for all remaining integer types." $nl "On PowerPC, or older x86 chips without SSE, software fallbacks are used for all high-level vector operations. SIMD code can run with no loss in functionality, just decreased performance." $nl From 6d4e5af088780383c1a1e6c8dfe21142deaac2b9 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 23 Sep 2009 05:13:15 -0500 Subject: [PATCH 15/15] cpu.x86.features: don't fold away sse-version, instead memoize it and recompute on startup --- basis/cpu/x86/features/features.factor | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/basis/cpu/x86/features/features.factor b/basis/cpu/x86/features/features.factor index 5fad4e802c..b21aa762d8 100644 --- a/basis/cpu/x86/features/features.factor +++ b/basis/cpu/x86/features/features.factor @@ -13,16 +13,18 @@ FUNCTION: longlong read_timestamp_counter ( ) ; PRIVATE> -: sse-version ( -- n ) +MEMO: sse-version ( -- n ) sse_version - "sse-version" get string>number [ min ] when* ; foldable + "sse-version" get string>number [ min ] when* ; -: sse? ( -- ? ) sse-version 10 >= ; foldable -: sse2? ( -- ? ) sse-version 20 >= ; foldable -: sse3? ( -- ? ) sse-version 30 >= ; foldable -: ssse3? ( -- ? ) sse-version 33 >= ; foldable -: sse4.1? ( -- ? ) sse-version 41 >= ; foldable -: sse4.2? ( -- ? ) sse-version 42 >= ; foldable +[ \ sse-version reset-memoized ] "cpu.x86.features" add-init-hook + +: sse? ( -- ? ) sse-version 10 >= ; +: sse2? ( -- ? ) sse-version 20 >= ; +: sse3? ( -- ? ) sse-version 30 >= ; +: ssse3? ( -- ? ) sse-version 33 >= ; +: sse4.1? ( -- ? ) sse-version 41 >= ; +: sse4.2? ( -- ? ) sse-version 42 >= ; : sse-string ( version -- string ) {