From 23cf6413dc9a34c4f0f0d381bb6d0e05f428e098 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sun, 18 Apr 2010 11:54:22 -0700 Subject: [PATCH 1/2] cuda.ptx: some unit tests --- extra/cuda/ptx/ptx-tests.factor | 114 ++++++++++++++++++++++++++++++++ extra/cuda/ptx/ptx.factor | 32 ++++++--- 2 files changed, 136 insertions(+), 10 deletions(-) create mode 100644 extra/cuda/ptx/ptx-tests.factor diff --git a/extra/cuda/ptx/ptx-tests.factor b/extra/cuda/ptx/ptx-tests.factor new file mode 100644 index 0000000000..877bc82811 --- /dev/null +++ b/extra/cuda/ptx/ptx-tests.factor @@ -0,0 +1,114 @@ +USING: cuda.ptx tools.test ; +IN: cuda.ptx.tests + +[ """ .version 2.0 + .target sm_20 +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20, .texmode_independent +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } { texmode .texmode_independent } } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_11, map_f64_to_f32 +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target + { arch sm_11 } + { map_f64_to_f32? t } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_11, map_f64_to_f32, .texmode_independent +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target + { arch sm_11 } + { map_f64_to_f32? t } + { texmode .texmode_independent } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + .global .f32 foo[9000]; + .extern .align 16 .shared .v4.f32 bar[]; + .func (.reg .f32 sum) zap (.reg .f32 a, .reg .f32 b) + { + add.rn.f32 sum, a, b; + ret; + } + .func frob (.align 8 .param .u64 in, .align 8 .param .u64 out, .align 8 .param .u64 len) + { + ret; + } + .func twib + { + ret; + } +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ ptx-variable + { storage-space .global } + { type .f32 } + { name "foo" } + { dim 9000 } + } + T{ ptx-variable + { extern? t } + { align 16 } + { storage-space .shared } + { type T{ .v4 f .f32 } } + { name "bar" } + { dim 0 } + } + T{ ptx-func + { return T{ ptx-variable { storage-space .reg } { type .f32 } { name "sum" } } } + { name "zap" } + { params { + T{ ptx-variable { storage-space .reg } { type .f32 } { name "a" } } + T{ ptx-variable { storage-space .reg } { type .f32 } { name "b" } } + } } + { body { + T{ add { round .rn } { type .f32 } { dest "sum" } { a "a" } { b "b" } } + T{ ret } + } } + } + T{ ptx-func + { name "frob" } + { params { + T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "in" } } + T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "out" } } + T{ ptx-variable { align 8 } { storage-space .param } { type .u64 } { name "len" } } + } } + { body { + T{ ret } + } } + } + T{ ptx-func + { name "twib" } + { body { + T{ ret } + } } + } + } } + } ptx>string +] unit-test diff --git a/extra/cuda/ptx/ptx.factor b/extra/cuda/ptx/ptx.factor index 8d4925d55f..8a30659640 100644 --- a/extra/cuda/ptx/ptx.factor +++ b/extra/cuda/ptx/ptx.factor @@ -1,6 +1,6 @@ ! (c)2010 Joe Groff bsd license -USING: accessors arrays combinators io kernel math math.parser -roles sequences strings variants words ; +USING: accessors arrays combinators io io.streams.string kernel +math math.parser roles sequences strings variants words ; FROM: roles => TUPLE: ; IN: cuda.ptx @@ -62,6 +62,7 @@ TUPLE: ptx-variable { parameter ?integer } { dim dim } { initializer ?string } ; +UNION: ?ptx-variable POSTPONE: f ptx-variable ; TUPLE: ptx-predicate { negated? boolean } @@ -79,7 +80,7 @@ TUPLE: ptx-entry body ; TUPLE: ptx-func < ptx-entry - { return ptx-variable } ; + { return ?ptx-variable } ; TUPLE: ptx-directive ; @@ -331,15 +332,23 @@ TUPLE: xor < ptx-3op-instruction ; GENERIC: ptx-element-label ( elt -- label ) M: object ptx-element-label drop f ; +GENERIC: ptx-semicolon? ( elt -- ? ) +M: object ptx-semicolon? drop t ; +M: ptx-target ptx-semicolon? drop f ; +M: ptx-entry ptx-semicolon? drop f ; +M: ptx-func ptx-semicolon? drop f ; +M: .file ptx-semicolon? drop f ; +M: .loc ptx-semicolon? drop f ; + GENERIC: (write-ptx-element) ( elt -- ) : write-ptx-element ( elt -- ) dup ptx-element-label [ write ":" write ] when* - "\t" write (write-ptx-element) - ";" print ; + "\t" write dup (write-ptx-element) + ptx-semicolon? [ ";" print ] [ nl ] if ; : write-ptx ( ptx -- ) - "\t.version " write dup version>> write ";" print + "\t.version " write dup version>> print dup target>> write-ptx-element body>> [ write-ptx-element ] each ; @@ -399,9 +408,9 @@ M: ptx-variable (write-ptx-element) "\t}" write ; : write-entry ( entry -- ) - dup name>> write " " write - dup params>> [ write-params ] when* nl - dup directives>> [ (write-ptx-element) ] each nl + dup name>> write + dup params>> [ " " write write-params ] when* nl + dup directives>> [ (write-ptx-element) nl ] each dup body>> write-body drop ; @@ -754,5 +763,8 @@ M: vote (write-ptx-element) dup mode>> (write-ptx-element) write-2op ; M: xor (write-ptx-element) - "or" write-insn + "xor" write-insn write-3op ; + +: ptx>string ( ptx -- string ) + [ write-ptx ] with-string-writer ; From 717dd1b10ea31d19ebadee83f8d9cdb147dd85ac Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Mon, 19 Apr 2010 00:40:10 -0700 Subject: [PATCH 2/2] cuda.ptx: unit tests for instruction serialization --- extra/cuda/ptx/ptx-tests.factor | 977 ++++++++++++++++++++++++++++++++ extra/cuda/ptx/ptx.factor | 28 +- 2 files changed, 995 insertions(+), 10 deletions(-) diff --git a/extra/cuda/ptx/ptx-tests.factor b/extra/cuda/ptx/ptx-tests.factor index 877bc82811..28391a5f58 100644 --- a/extra/cuda/ptx/ptx-tests.factor +++ b/extra/cuda/ptx/ptx-tests.factor @@ -112,3 +112,980 @@ IN: cuda.ptx.tests } } } ptx>string ] unit-test + +[ """ .version 2.0 + .target sm_20 + abs.s32 a, b; + @p abs.s32 a, b; + @!p abs.s32 a, b; +foo: abs.s32 a, b; + abs.ftz.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ abs { type .s32 } { dest "a" } { a "b" } } + T{ abs + { predicate T{ ptx-predicate { variable "p" } } } + { type .s32 } { dest "a" } { a "b" } + } + T{ abs + { predicate T{ ptx-predicate { negated? t } { variable "p" } } } + { type .s32 } { dest "a" } { a "b" } + } + T{ abs + { label "foo" } + { type .s32 } { dest "a" } { a "b" } + } + T{ abs { type .f32 } { dest "a" } { a "b" } { ftz? t } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + add.s32 a, b, c; + add.cc.s32 a, b, c; + add.sat.s32 a, b, c; + add.ftz.f32 a, b, c; + add.ftz.sat.f32 a, b, c; + add.rz.sat.f32 a, b, c; + add.rz.ftz.sat.f32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ add { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ add { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ add { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ add { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ add { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ add { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ add { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + addc.s32 a, b, c; + addc.cc.s32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ addc { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ addc { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + and.b32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ and { type .b32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + atom.and.u32 a, [b], c; + atom.global.or.u32 a, [b], c; + atom.shared.cas.u32 a, [b], c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ atom { op .and } { type .u32 } { dest "a" } { a "[b]" } { b "c" } } + T{ atom { storage-space .global } { op .or } { type .u32 } { dest "a" } { a "[b]" } { b "c" } } + T{ atom { storage-space .shared } { op .cas } { type .u32 } { dest "a" } { a "[b]" } { b "c" } { c "d" } } + + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + bar.arrive a, b; + bar.red.popc.u32 a, b, d; + bar.red.popc.u32 a, b, !d; + bar.red.popc.u32 a, b, c, !d; + bar.sync a; + bar.sync a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ bar.arrive { a "a" } { b "b" } } + T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { c "d" } } + T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { c "!d" } } + T{ bar.red { op .popc } { type .u32 } { dest "a" } { a "b" } { b "c" } { c "!d" } } + T{ bar.sync { a "a" } } + T{ bar.sync { a "a" } { b "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + bfe.u32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ bfe { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + bfi.u32 a, b, c, d, e; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ bfi { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } { d "e" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + bfind.u32 a, b; + bfind.shiftamt.u32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ bfind { type .u32 } { dest "a" } { a "b" } } + T{ bfind { type .u32 } { shiftamt? t } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + bra foo; + bra.uni bar; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ bra { target "foo" } } + T{ bra { uni? t } { target "bar" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + brev.b32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ brev { type .b32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + brkpt; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ brkpt } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + call foo; + call.uni foo; + call (a), foo; + call (a), foo, (b); + call (a), foo, (b, c); + call (a), foo, (b, c, d); + call foo, (b, c, d); +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ call { target "foo" } } + T{ call { uni? t } { target "foo" } } + T{ call { return "a" } { target "foo" } } + T{ call { return "a" } { target "foo" } { params { "b" } } } + T{ call { return "a" } { target "foo" } { params { "b" "c" } } } + T{ call { return "a" } { target "foo" } { params { "b" "c" "d" } } } + T{ call { target "foo" } { params { "b" "c" "d" } } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + clz.b32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ clz { type .b32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + cnot.b32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ cnot { type .b32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + copysign.f64 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ copysign { type .f64 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + cos.approx.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ cos { round .approx } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + cvt.f32.s32 a, b; + cvt.s32.f32 a, b; + cvt.rp.f32.f64 a, b; + cvt.rpi.s32.f32 a, b; + cvt.ftz.f32.f64 a, b; + cvt.sat.f32.f64 a, b; + cvt.ftz.sat.f32.f64 a, b; + cvt.rp.ftz.sat.f32.f64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ cvt { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } } + T{ cvt { dest-type .s32 } { type .f32 } { dest "a" } { a "b" } } + T{ cvt { round .rp } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } } + T{ cvt { round .rpi } { dest-type .s32 } { type .f32 } { dest "a" } { a "b" } } + T{ cvt { ftz? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } } + T{ cvt { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } } + T{ cvt { ftz? t } { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } } + T{ cvt { round .rp } { ftz? t } { sat? t } { dest-type .f32 } { type .f64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + cvta.global.u64 a, b; + cvta.shared.u64 a, b; + cvta.to.shared.u64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ cvta { storage-space .global } { type .u64 } { dest "a" } { a "b" } } + T{ cvta { storage-space .shared } { type .u64 } { dest "a" } { a "b" } } + T{ cvta { to? t } { storage-space .shared } { type .u64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + div.u32 a, b, c; + div.approx.f32 a, b, c; + div.approx.ftz.f32 a, b, c; + div.full.f32 a, b, c; + div.full.ftz.f32 a, b, c; + div.f32 a, b, c; + div.rz.f32 a, b, c; + div.ftz.f32 a, b, c; + div.rz.ftz.f32 a, b, c; + div.f64 a, b, c; + div.rz.f64 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ div { type .u32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .approx } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .full } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .full } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .rz } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ div { type .f64 } { dest "a" } { a "b" } { b "c" } } + T{ div { round .rz } { type .f64 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + ex2.approx.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ ex2 { round .approx } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + exit; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ exit } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + fma.f32 a, b, c, d; + fma.sat.f32 a, b, c, d; + fma.ftz.f32 a, b, c, d; + fma.ftz.sat.f32 a, b, c, d; + fma.rz.sat.f32 a, b, c, d; + fma.rz.ftz.sat.f32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ fma { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ fma { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ fma { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ fma { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ fma { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ fma { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + isspacep.shared a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ isspacep { storage-space .shared } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + ld.u32 a, [b]; + ld.v2.u32 a, [b]; + ld.v4.u32 a, [b]; + ld.v4.u32 {a, b, c, d}, [e]; + ld.lu.u32 a, [b]; + ld.const.lu.u32 a, [b]; + ld.volatile.const[5].u32 a, [b]; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ ld { type .u32 } { dest "a" } { a "[b]" } } + T{ ld { type T{ .v2 { of .u32 } } } { dest "a" } { a "[b]" } } + T{ ld { type T{ .v4 { of .u32 } } } { dest "a" } { a "[b]" } } + T{ ld { type T{ .v4 { of .u32 } } } { dest "{a, b, c, d}" } { a "[e]" } } + T{ ld { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } } + T{ ld { storage-space T{ .const } } { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } } + T{ ld { volatile? t } { storage-space T{ .const { bank 5 } } } { type .u32 } { dest "a" } { a "[b]" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + ldu.u32 a, [b]; + ldu.v2.u32 a, [b]; + ldu.v4.u32 a, [b]; + ldu.v4.u32 {a, b, c, d}, [e]; + ldu.lu.u32 a, [b]; + ldu.const.lu.u32 a, [b]; + ldu.volatile.const[5].u32 a, [b]; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ ldu { type .u32 } { dest "a" } { a "[b]" } } + T{ ldu { type T{ .v2 { of .u32 } } } { dest "a" } { a "[b]" } } + T{ ldu { type T{ .v4 { of .u32 } } } { dest "a" } { a "[b]" } } + T{ ldu { type T{ .v4 { of .u32 } } } { dest "{a, b, c, d}" } { a "[e]" } } + T{ ldu { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } } + T{ ldu { storage-space T{ .const } } { cache-op .lu } { type .u32 } { dest "a" } { a "[b]" } } + T{ ldu { volatile? t } { storage-space T{ .const { bank 5 } } } { type .u32 } { dest "a" } { a "[b]" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + lg2.approx.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ lg2 { round .approx } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + mad.s32 a, b, c, d; + mad.lo.s32 a, b, c, d; + mad.sat.s32 a, b, c, d; + mad.hi.sat.s32 a, b, c, d; + mad.ftz.f32 a, b, c, d; + mad.ftz.sat.f32 a, b, c, d; + mad.rz.sat.f32 a, b, c, d; + mad.rz.ftz.sat.f32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ mad { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { mode .lo } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { mode .hi } { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + mad24.s32 a, b, c, d; + mad24.lo.s32 a, b, c, d; + mad24.sat.s32 a, b, c, d; + mad24.hi.sat.s32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ mad24 { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad24 { mode .lo } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad24 { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ mad24 { mode .hi } { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + neg.s32 a, b; + neg.f32 a, b; + neg.ftz.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ neg { type .s32 } { dest "a" } { a "b" } } + T{ neg { type .f32 } { dest "a" } { a "b" } } + T{ neg { ftz? t } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + not.b32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ not { type .b32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + or.b32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ or { type .b32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + pmevent a; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ pmevent { a "a" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + popc.b64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ popc { type .b64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + prefetch.L1 [a]; + prefetch.local.L2 [a]; + prefetchu.L1 [a]; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ prefetch { level .L1 } { a "[a]" } } + T{ prefetch { storage-space .local } { level .L2 } { a "[a]" } } + T{ prefetchu { level .L1 } { a "[a]" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + prmt.b32 a, b, c, d; + prmt.b32.f4e a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ prmt { type .b32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ prmt { type .b32 } { mode .f4e } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + rcp.approx.f32 a, b; + rcp.approx.ftz.f32 a, b; + rcp.f32 a, b; + rcp.rz.f32 a, b; + rcp.ftz.f32 a, b; + rcp.rz.ftz.f32 a, b; + rcp.f64 a, b; + rcp.rz.f64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ rcp { round .approx } { type .f32 } { dest "a" } { a "b" } } + T{ rcp { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ rcp { type .f32 } { dest "a" } { a "b" } } + T{ rcp { round .rz } { type .f32 } { dest "a" } { a "b" } } + T{ rcp { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ rcp { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ rcp { type .f64 } { dest "a" } { a "b" } } + T{ rcp { round .rz } { type .f64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + red.and.u32 [a], b; + red.global.and.u32 [a], b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ red { op .and } { type .u32 } { dest "[a]" } { a "b" } } + T{ red { storage-space .global } { op .and } { type .u32 } { dest "[a]" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + rsqrt.approx.f32 a, b; + rsqrt.approx.ftz.f32 a, b; + rsqrt.approx.f64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ rsqrt { round .approx } { type .f32 } { dest "a" } { a "b" } } + T{ rsqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ rsqrt { round .approx } { type .f64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + rsqrt.approx.f32 a, b; + rsqrt.approx.ftz.f32 a, b; + rsqrt.approx.f64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ rsqrt { round .approx } { type .f32 } { dest "a" } { a "b" } } + T{ rsqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ rsqrt { round .approx } { type .f64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + sad.u32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ sad { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + selp.u32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ selp { type .u32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + set.gt.u32.s32 a, b, c; + set.gt.ftz.u32.f32 a, b, c; + set.gt.and.ftz.u32.f32 a, b, c, d; + set.gt.and.ftz.u32.f32 a, b, c, !d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ set { cmp-op .gt } { dest-type .u32 } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ set { cmp-op .gt } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ set { cmp-op .gt } { bool-op .and } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ set { cmp-op .gt } { bool-op .and } { ftz? t } { dest-type .u32 } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "!d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + setp.gt.s32 a, b, c; + setp.gt.s32 a|z, b, c; + setp.gt.ftz.f32 a, b, c; + setp.gt.and.ftz.f32 a, b, c, d; + setp.gt.and.ftz.f32 a, b, c, !d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ setp { cmp-op .gt } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ setp { cmp-op .gt } { type .s32 } { dest "a" } { |dest "z" } { a "b" } { b "c" } } + T{ setp { cmp-op .gt } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ setp { cmp-op .gt } { bool-op .and } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ setp { cmp-op .gt } { bool-op .and } { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } { c "!d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + shl.b32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ shl { type .b32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + shr.b32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ shr { type .b32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + sin.approx.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ sin { round .approx } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + slct.f32.s32 a, b, c, d; + slct.ftz.f32.s32 a, b, c, d; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ slct { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + T{ slct { ftz? t } { dest-type .f32 } { type .s32 } { dest "a" } { a "b" } { b "c" } { c "d" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + sqrt.approx.f32 a, b; + sqrt.approx.ftz.f32 a, b; + sqrt.f32 a, b; + sqrt.rz.f32 a, b; + sqrt.ftz.f32 a, b; + sqrt.rz.ftz.f32 a, b; + sqrt.f64 a, b; + sqrt.rz.f64 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ sqrt { round .approx } { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { round .approx } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { round .rz } { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { round .rz } { ftz? t } { type .f32 } { dest "a" } { a "b" } } + T{ sqrt { type .f64 } { dest "a" } { a "b" } } + T{ sqrt { round .rz } { type .f64 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + st.u32 [a], b; + st.v2.u32 [a], b; + st.v4.u32 [a], b; + st.v4.u32 [a], {b, c, d, e}; + st.lu.u32 [a], b; + st.local.lu.u32 [a], b; + st.volatile.local.u32 [a], b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ st { type .u32 } { dest "[a]" } { a "b" } } + T{ st { type T{ .v2 { of .u32 } } } { dest "[a]" } { a "b" } } + T{ st { type T{ .v4 { of .u32 } } } { dest "[a]" } { a "b" } } + T{ st { type T{ .v4 { of .u32 } } } { dest "[a]" } { a "{b, c, d, e}" } } + T{ st { cache-op .lu } { type .u32 } { dest "[a]" } { a "b" } } + T{ st { storage-space .local } { cache-op .lu } { type .u32 } { dest "[a]" } { a "b" } } + T{ st { volatile? t } { storage-space .local } { type .u32 } { dest "[a]" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + sub.s32 a, b, c; + sub.cc.s32 a, b, c; + sub.sat.s32 a, b, c; + sub.ftz.f32 a, b, c; + sub.ftz.sat.f32 a, b, c; + sub.rz.sat.f32 a, b, c; + sub.rz.ftz.sat.f32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ sub { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { sat? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { ftz? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { round .rz } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + T{ sub { round .rz } { ftz? t } { sat? t } { type .f32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + subc.s32 a, b, c; + subc.cc.s32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ subc { type .s32 } { dest "a" } { a "b" } { b "c" } } + T{ subc { cc? t } { type .s32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + testp.finite.f32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ testp { op .finite } { type .f32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + trap; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ trap } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + vote.all.pred a, b; + vote.all.pred a, !b; + vote.ballot.b32 a, b; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ vote { mode .all } { type .pred } { dest "a" } { a "b" } } + T{ vote { mode .all } { type .pred } { dest "a" } { a "!b" } } + T{ vote { mode .ballot } { type .b32 } { dest "a" } { a "b" } } + } } + } ptx>string +] unit-test + +[ """ .version 2.0 + .target sm_20 + xor.b32 a, b, c; +""" ] [ + T{ ptx + { version "2.0" } + { target T{ ptx-target { arch sm_20 } } } + { body { + T{ xor { type .b32 } { dest "a" } { a "b" } { b "c" } } + } } + } ptx>string +] unit-test + diff --git a/extra/cuda/ptx/ptx.factor b/extra/cuda/ptx/ptx.factor index 8a30659640..4618f8b5b6 100644 --- a/extra/cuda/ptx/ptx.factor +++ b/extra/cuda/ptx/ptx.factor @@ -242,7 +242,7 @@ TUPLE: cnot < ptx-2op-instruction ; TUPLE: copysign < ptx-3op-instruction ; TUPLE: cos <{ ptx-2op-instruction ptx-float-env } ; TUPLE: cvt < ptx-2op-instruction - { rounding-mode ?ptx-rounding-mode } + { round ?ptx-rounding-mode } { ftz? boolean } { sat? boolean } { dest-type ptx-type } ; @@ -254,7 +254,7 @@ TUPLE: ex2 <{ ptx-2op-instruction ptx-float-env } ; TUPLE: exit < ptx-instruction ; TUPLE: fma <{ ptx-mad-instruction ptx-float-env } ; TUPLE: isspacep < ptx-instruction - { storage-space ?ptx-storage-space } + { storage-space ptx-storage-space } { dest string } { a string } ; TUPLE: ld < ptx-ldst-instruction ; @@ -547,7 +547,7 @@ M: bar.red (write-ptx-element) dup b>> [ ", " write write ] when* ", " write c>> write ; M: bar.sync (write-ptx-element) - "bar.arrive " write-insn + "bar.sync " write-insn dup a>> write dup b>> [ ", " write write ] when* drop ; @@ -563,15 +563,16 @@ M: bfind (write-ptx-element) write-2op ; M: bra (write-ptx-element) "bra" write-insn - dup write-uni - " " write target>> write ; + dup write-uni " " write + target>> write ; M: brev (write-ptx-element) "brev" write-insn write-2op ; M: brkpt (write-ptx-element) "brkpt" write-insn drop ; M: call (write-ptx-element) - "call" write-insn " " write + "call" write-insn + dup write-uni " " write dup return>> [ "(" write write "), " write ] when* dup target>> write dup params>> [ ", (" write ", " join write ")" write ] unless-empty @@ -591,7 +592,7 @@ M: cos (write-ptx-element) write-2op ; M: cvt (write-ptx-element) "cvt" write-insn - dup rounding-mode>> (write-ptx-element) + dup round>> (write-ptx-element) dup write-ftz dup write-sat dup dest-type>> (write-ptx-element) @@ -685,12 +686,17 @@ M: prefetchu (write-ptx-element) " " write a>> write ; M: prmt (write-ptx-element) "prmt" write-insn - dup mode>> (write-ptx-element) - write-4op ; + dup type>> (write-ptx-element) + dup mode>> (write-ptx-element) " " write + dup dest>> write ", " write + dup a>> write ", " write + dup b>> write ", " write + dup c>> write + drop ; M: rcp (write-ptx-element) "rcp" write-insn dup write-float-env - write-3op ; + write-2op ; M: red (write-ptx-element) "red" write-insn dup storage-space>> (write-ptx-element) @@ -758,6 +764,8 @@ M: testp (write-ptx-element) "testp" write-insn dup op>> (write-ptx-element) write-2op ; +M: trap (write-ptx-element) + "trap" write-insn drop ; M: vote (write-ptx-element) "vote" write-insn dup mode>> (write-ptx-element)