diff --git a/basis/cpu/x86/assembler/assembler-tests.factor b/basis/cpu/x86/assembler/assembler-tests.factor index 47d6434279..531110da7b 100644 --- a/basis/cpu/x86/assembler/assembler-tests.factor +++ b/basis/cpu/x86/assembler/assembler-tests.factor @@ -56,8 +56,24 @@ IN: cpu.x86.assembler.tests ! [ { HEX: f2 HEX: 41 HEX: 0f HEX: 11 HEX: 04 HEX: 24 } ] [ [ R12 [] XMM0 MOVSD ] { } make ] unit-test ! 3-operand r-rm-imm sse instructions -[ { HEX: 66 HEX: 0f HEX: 70 HEX: c1 HEX: 02 } ] [ [ XMM0 XMM1 2 PSHUFD ] { } make ] unit-test -[ { HEX: 0f HEX: c6 HEX: c1 HEX: 02 } ] [ [ XMM0 XMM1 2 SHUFPS ] { } make ] unit-test +[ { HEX: 66 HEX: 0f HEX: 70 HEX: c1 HEX: 02 } ] +[ [ XMM0 XMM1 2 PSHUFD ] { } make ] unit-test + +[ { HEX: 0f HEX: c6 HEX: c1 HEX: 02 } ] +[ [ XMM0 XMM1 2 SHUFPS ] { } make ] unit-test + +! shufflers with arrays of indexes +[ { HEX: 66 HEX: 0f HEX: 70 HEX: c1 HEX: 02 } ] +[ [ XMM0 XMM1 { 2 0 0 0 } PSHUFD ] { } make ] unit-test + +[ { HEX: 0f HEX: c6 HEX: c1 HEX: 63 } ] +[ [ XMM0 XMM1 { 3 0 2 1 } SHUFPS ] { } make ] unit-test + +[ { HEX: 66 HEX: 0f HEX: c6 HEX: c1 HEX: 2 } ] +[ [ XMM0 XMM1 { 0 1 } SHUFPD ] { } make ] unit-test + +[ { HEX: 66 HEX: 0f HEX: c6 HEX: c1 HEX: 1 } ] +[ [ XMM0 XMM1 { 1 0 } SHUFPD ] { } make ] unit-test ! scalar register insert/extract sse instructions [ { HEX: 66 HEX: 0f HEX: c4 HEX: c1 HEX: 02 } ] [ [ XMM0 ECX 2 PINSRW ] { } make ] unit-test diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index ceb9c54e6e..c097fe2b4d 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2005, 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays io.binary kernel combinators kernel.private math locals -namespaces make sequences words system layouts math.order accessors -cpu.x86.assembler.operands cpu.x86.assembler.operands.private ; +USING: arrays io.binary kernel combinators kernel.private math +math.bitwise locals namespaces make sequences words system +layouts math.order accessors cpu.x86.assembler.operands +cpu.x86.assembler.operands.private ; QUALIFIED: sequences IN: cpu.x86.assembler @@ -617,9 +618,14 @@ ALIAS: PINSRQ PINSRD : MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ; : MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ; -: PSHUFD ( dest src imm -- ) HEX: 70 HEX: 66 3-operand-rm-sse ; -: PSHUFLW ( dest src imm -- ) HEX: 70 HEX: f2 3-operand-rm-sse ; -: PSHUFHW ( dest src imm -- ) HEX: 70 HEX: f3 3-operand-rm-sse ; +: 2shuffler ( indexes/mask -- mask ) + dup integer? [ first2 { 1 0 } bitfield ] unless ; +: 4shuffler ( indexes/mask -- mask ) + dup integer? [ first4 { 6 4 2 0 } bitfield ] unless ; + +: PSHUFD ( dest src imm -- ) 4shuffler HEX: 70 HEX: 66 3-operand-rm-sse ; +: PSHUFLW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f2 3-operand-rm-sse ; +: PSHUFHW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f3 3-operand-rm-sse ; : MOVNTI ( dest src -- ) { HEX: 0f HEX: c3 } (2-operand) ; : PINSRW ( dest src imm -- ) HEX: c4 HEX: 66 3-operand-rm-sse ; -: SHUFPS ( dest src imm -- ) HEX: c6 f 3-operand-rm-sse ; -: SHUFPD ( dest src imm -- ) HEX: c6 HEX: 66 3-operand-rm-sse ; +: SHUFPS ( dest src imm -- ) 4shuffler HEX: c6 f 3-operand-rm-sse ; +: SHUFPD ( dest src imm -- ) 2shuffler HEX: c6 HEX: 66 3-operand-rm-sse ; : ADDSUBPD ( dest src -- ) HEX: d0 HEX: 66 2-operand-rm-sse ; : ADDSUBPS ( dest src -- ) HEX: d0 HEX: f2 2-operand-rm-sse ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 6e51520030..2668379b37 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -590,22 +590,25 @@ M:: x86 %broadcast-vector ( dst src rep -- ) rep unsign-rep { { float-4-rep [ dst src float-4-rep %copy - dst dst 0 SHUFPS + dst dst { 0 0 0 0 } SHUFPS ] } { double-2-rep [ dst src double-2-rep %copy dst dst UNPCKLPD ] } - { longlong-2-rep [ dst src BIN: 01000100 PSHUFD ] } - { int-4-rep [ dst src 0 PSHUFD ] } + { longlong-2-rep [ + dst src longlong-2-rep %copy + dst dst PUNPCKLQDQ + ] } + { int-4-rep [ dst src { 0 0 0 0 } PSHUFD ] } { short-8-rep [ - dst src 0 PSHUFLW + dst src { 0 0 0 0 } PSHUFLW dst dst PUNPCKLQDQ ] } { char-16-rep [ dst src char-16-rep %copy dst dst PUNPCKLBW - dst dst 0 PSHUFLW + dst dst { 0 0 0 0 } PSHUFLW dst dst PUNPCKLQDQ ] } } case ;