From 420221129392e3c71790443a443af2dd4a89265e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 16:38:35 -0500 Subject: [PATCH] cpu.x86: cleanups --- basis/cpu/x86/assembler/assembler.factor | 4 +++ basis/cpu/x86/x86.factor | 31 ++++++++++-------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index c097fe2b4d..57738ce4ba 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -618,11 +618,15 @@ ALIAS: PINSRQ PINSRD : MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ; : MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ; + + : PSHUFD ( dest src imm -- ) 4shuffler HEX: 70 HEX: 66 3-operand-rm-sse ; : PSHUFLW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f2 3-operand-rm-sse ; : PSHUFHW ( dest src imm -- ) 4shuffler HEX: 70 HEX: f3 3-operand-rm-sse ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4540db1053..857e1ede6b 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -579,12 +579,12 @@ MACRO: available-reps ( alist -- ) '[ _ cond ] ; : unsign-rep ( rep -- rep' ) - dup { + { { uint-4-rep int-4-rep } { ulonglong-2-rep longlong-2-rep } { ushort-8-rep short-8-rep } { uchar-16-rep char-16-rep } - } at* [ nip ] [ drop ] if ; + } ?at drop ; M:: x86 %broadcast-vector ( dst src rep -- ) rep unsign-rep { @@ -592,22 +592,23 @@ M:: x86 %broadcast-vector ( dst src rep -- ) dst src float-4-rep %copy dst dst { 0 0 0 0 } SHUFPS ] } - { double-2-rep [ + { double-2-rep [ dst src MOVDDUP ] } { longlong-2-rep [ - dst src = [ - dst dst PUNPCKLQDQ - ] [ - dst src { 0 1 0 1 } PSHUFD - ] if + dst src = + [ dst dst PUNPCKLQDQ ] + [ dst src { 0 1 0 1 } PSHUFD ] + if ] } - { int-4-rep [ dst src { 0 0 0 0 } PSHUFD ] } - { short-8-rep [ + { int-4-rep [ + dst src { 0 0 0 0 } PSHUFD + ] } + { short-8-rep [ dst src { 0 0 0 0 } PSHUFLW dst dst PUNPCKLQDQ ] } - { char-16-rep [ + { char-16-rep [ dst src char-16-rep %copy dst dst PUNPCKLBW dst dst { 0 0 0 0 } PSHUFLW @@ -619,13 +620,7 @@ M: x86 %broadcast-vector-reps { ! Can't do this with sse1 since it will want to unbox ! a double-precision float and convert to single precision - { sse2? { - float-4-rep double-2-rep - longlong-2-rep ulonglong-2-rep - int-4-rep uint-4-rep - short-8-rep ushort-8-rep - char-16-rep uchar-16-rep - } } + { sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } } } available-reps ; M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )