From 1624903ae16c40d0e374d162edfd113b67d05dd5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 19 Apr 2010 00:46:03 -0500 Subject: [PATCH] Split up cuda vocab some more, make CUDA-LIBRARY: work --- extra/cuda/cuda.factor | 320 ++---------------- .../cuda/demos/hello-world/hello-world.factor | 23 +- extra/cuda/demos/prefix-sum/prefix-sum.factor | 9 +- extra/cuda/memory/authors.txt | 1 + extra/cuda/memory/memory.factor | 74 ++++ extra/cuda/syntax/syntax.factor | 11 +- extra/cuda/utils/authors.txt | 1 + extra/cuda/utils/utils.factor | 204 +++++++++++ 8 files changed, 316 insertions(+), 327 deletions(-) create mode 100644 extra/cuda/memory/authors.txt create mode 100644 extra/cuda/memory/memory.factor create mode 100644 extra/cuda/utils/authors.txt create mode 100644 extra/cuda/utils/utils.factor diff --git a/extra/cuda/cuda.factor b/extra/cuda/cuda.factor index d8b6f2e2ce..94e10a96dd 100644 --- a/extra/cuda/cuda.factor +++ b/extra/cuda/cuda.factor @@ -2,324 +2,42 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.data alien.parser alien.strings alien.syntax arrays assocs byte-arrays classes.struct -combinators continuations cuda.ffi destructors fry io -io.backend io.encodings.string io.encodings.utf8 kernel lexer -locals macros math math.parser namespaces nested-comments -opengl.gl.extensions parser prettyprint quotations sequences -words ; +combinators continuations cuda.ffi cuda.memory cuda.utils +destructors fry io io.backend io.encodings.string +io.encodings.utf8 kernel lexer locals macros math math.parser +namespaces nested-comments opengl.gl.extensions parser +prettyprint quotations sequences words ; QUALIFIED-WITH: alien.c-types a IN: cuda -SYMBOL: cuda-device -SYMBOL: cuda-context -SYMBOL: cuda-module -SYMBOL: cuda-function -SYMBOL: cuda-launcher -SYMBOL: cuda-memory-hashtable - -SYMBOL: cuda-libraries -cuda-libraries [ H{ } clone ] initialize - -SYMBOL: cuda-functions - -TUPLE: cuda-library name path ; - -: ( name path -- obj ) - \ cuda-library new - swap >>path - swap >>name ; - -: add-cuda-library ( name path -- ) - normalize-path - dup name>> cuda-libraries get set-at ; - -: cuda-library ( name -- cuda-library ) - cuda-libraries get at ; - -ERROR: throw-cuda-error n ; - -: cuda-error ( n -- ) - dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ; - -: cuda-version ( -- n ) - a:int [ cuDriverGetVersion cuda-error ] keep a:*int ; - -: init-cuda ( -- ) - 0 cuInit cuda-error ; - TUPLE: launcher { device integer initial: 0 } -{ device-flags initial: 0 } -path ; +{ device-flags initial: 0 } ; TUPLE: function-launcher -dim-block -dim-grid -shared-size -stream ; +dim-block dim-grid shared-size stream ; : with-cuda-context ( flags device quot -- ) + H{ } clone cuda-modules set-global H{ } clone cuda-functions set - [ - [ CUcontext ] 2dip - [ cuCtxCreate cuda-error ] 3keep 2drop a:*void* - ] dip + [ create-context ] dip [ '[ _ @ ] ] - [ drop '[ _ cuCtxDestroy cuda-error ] ] 2bi + [ drop '[ _ destroy-context ] ] 2bi [ ] cleanup ; inline -: with-cuda-module ( path quot -- ) - [ - normalize-path - [ CUmodule ] dip - [ cuModuleLoad cuda-error ] 2keep drop a:*void* - ] dip - [ '[ _ @ ] ] - [ drop '[ _ cuModuleUnload cuda-error ] ] 2bi - [ ] cleanup ; inline - -: with-cuda-program ( flags device path quot -- ) +: with-cuda-program ( flags device quot -- ) [ dup cuda-device set ] 2dip - '[ - cuda-context set - _ [ - cuda-module set - _ call - ] with-cuda-module - ] with-cuda-context ; inline + '[ cuda-context set _ call ] with-cuda-context ; inline : with-cuda ( launcher quot -- ) - [ - init-cuda - H{ } clone cuda-memory-hashtable - ] 2dip '[ + init-cuda + [ H{ } clone cuda-memory-hashtable ] 2dip '[ _ [ cuda-launcher set ] - [ [ device>> ] [ device-flags>> ] [ path>> ] tri ] bi + [ [ device>> ] [ device-flags>> ] bi ] bi _ with-cuda-program ] with-variable ; inline - [ cuDeviceGetCount cuda-error ] keep a:*int ; - -: n>cuda-device ( n -- device ) - [ CUdevice ] dip [ cuDeviceGet cuda-error ] 2keep drop a:*int ; - -: enumerate-cuda-devices ( -- devices ) - #cuda-devices iota [ n>cuda-device ] map ; - -: cuda-device-properties ( device -- properties ) - [ CUdevprop ] dip - [ cuDeviceGetProperties cuda-error ] 2keep drop - CUdevprop memory>struct ; - -PRIVATE> - -: cuda-devices ( -- assoc ) - enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ; - -: cuda-device-name ( n -- string ) - [ 256 [ ] keep ] dip - [ cuDeviceGetName cuda-error ] - [ 2drop utf8 alien>string ] 3bi ; - -: cuda-device-capability ( n -- pair ) - [ a:int a:int ] dip - [ cuDeviceComputeCapability cuda-error ] - [ drop [ a:*int ] bi@ ] 3bi 2array ; - -: cuda-device-memory ( n -- bytes ) - [ a:uint ] dip - [ cuDeviceTotalMem cuda-error ] - [ drop a:*uint ] 2bi ; - -: get-function-ptr* ( module string -- function ) - [ CUfunction ] 2dip - [ cuModuleGetFunction cuda-error ] 3keep 2drop a:*void* ; - -: get-function-ptr ( string -- function ) - [ cuda-module get ] dip get-function-ptr* ; - -: with-cuda-function ( string quot -- ) - [ - get-function-ptr* cuda-function set - ] dip call ; inline - -: cached-cuda-function ( string -- alien ) - cuda-functions get [ get-function-ptr ] cache ; - -: launch-function* ( function -- ) cuLaunch cuda-error ; - -: launch-function ( -- ) cuda-function get cuLaunch cuda-error ; - -: launch-function-grid* ( function width height -- ) - cuLaunchGrid cuda-error ; - -: launch-function-grid ( width height -- ) - [ cuda-function get ] 2dip - cuLaunchGrid cuda-error ; - -TUPLE: cuda-memory < disposable ptr length ; - -: ( ptr length -- obj ) - cuda-memory new-disposable - swap >>length - swap >>ptr ; - -: add-cuda-memory ( obj -- obj ) - dup dup ptr>> cuda-memory-hashtable get set-at ; - -: delete-cuda-memory ( obj -- ) - cuda-memory-hashtable delete-at ; - -ERROR: invalid-cuda-memory ptr ; - -: cuda-memory-length ( cuda-memory -- n ) - ptr>> cuda-memory-hashtable get ?at [ - length>> - ] [ - invalid-cuda-memory - ] if ; - -M: cuda-memory byte-length length>> ; - -: cuda-malloc ( n -- ptr ) - [ CUdeviceptr ] dip - [ cuMemAlloc cuda-error ] 2keep - [ a:*int ] dip add-cuda-memory ; - -: cuda-free* ( ptr -- ) - cuMemFree cuda-error ; - -M: cuda-memory dispose ( ptr -- ) - ptr>> cuda-free* ; - -: host>device ( dest-ptr src-ptr -- ) - [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ; - -:: device>host ( ptr -- seq ) - ptr byte-length - [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ; - -: memcpy-device>device ( dest-ptr src-ptr count -- ) - cuMemcpyDtoD cuda-error ; - -: memcpy-device>array ( dest-array dest-index src-ptr count -- ) - cuMemcpyDtoA cuda-error ; - -: memcpy-array>device ( dest-ptr src-array src-index count -- ) - cuMemcpyAtoD cuda-error ; - -: memcpy-array>host ( dest-ptr src-array src-index count -- ) - cuMemcpyAtoH cuda-error ; - -: memcpy-host>array ( dest-array dest-index src-ptr count -- ) - cuMemcpyHtoA cuda-error ; - -: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- ) - cuMemcpyAtoA cuda-error ; - -: cuda-int* ( function offset value -- ) - cuParamSeti cuda-error ; - -: cuda-int ( offset value -- ) - [ cuda-function get ] 2dip cuda-int* ; - -: cuda-float* ( function offset value -- ) - cuParamSetf cuda-error ; - -: cuda-float ( offset value -- ) - [ cuda-function get ] 2dip cuda-float* ; - -: cuda-vector* ( function offset ptr n -- ) - cuParamSetv cuda-error ; - -: cuda-vector ( offset ptr n -- ) - [ cuda-function get ] 3dip cuda-vector* ; - -: param-size* ( function n -- ) - cuParamSetSize cuda-error ; - -: param-size ( n -- ) - [ cuda-function get ] dip param-size* ; - -: malloc-device-string ( string -- n ) - utf8 encode - [ length cuda-malloc ] keep - [ host>device ] [ drop ] 2bi ; - -ERROR: bad-cuda-parameter parameter ; - -:: set-parameters ( seq -- ) - cuda-function get :> function - 0 :> offset! - seq [ - [ offset ] dip - { - { [ dup cuda-memory? ] [ ptr>> cuda-int ] } - { [ dup float? ] [ cuda-float ] } - { [ dup integer? ] [ cuda-int ] } - [ bad-cuda-parameter ] - } cond - offset 4 + offset! - ] each - offset param-size ; - -: cuda-device-attribute ( attribute dev -- n ) - [ a:int ] 2dip - [ cuDeviceGetAttribute cuda-error ] - [ 2drop a:*int ] 3bi ; - -: function-block-shape* ( function x y z -- ) - cuFuncSetBlockShape cuda-error ; - -: function-block-shape ( x y z -- ) - [ cuda-function get ] 3dip - cuFuncSetBlockShape cuda-error ; - -: function-shared-size* ( function n -- ) - cuFuncSetSharedSize cuda-error ; - -: function-shared-size ( n -- ) - [ cuda-function get ] dip - cuFuncSetSharedSize cuda-error ; - -: launch ( -- ) - cuda-launcher get { - [ block-shape>> first3 function-block-shape ] - [ shared-size>> function-shared-size ] - [ - grid>> [ - launch-function - ] [ - first2 launch-function-grid - ] if-empty - ] - } cleave ; - -: cuda-device. ( n -- ) - { - [ "Device: " write number>string print ] - [ "Name: " write cuda-device-name print ] - [ "Memory: " write cuda-device-memory number>string print ] - [ - "Capability: " write - cuda-device-capability [ number>string ] map " " join print - ] - [ "Properties: " write cuda-device-properties . ] - [ - "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write - CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap - cuda-device-attribute number>string print - ] - } cleave ; - -: cuda. ( -- ) - "CUDA Version: " write cuda-version number>string print nl - #cuda-devices iota [ nl ] [ cuda-device. ] interleave ; - : c-type>cuda-setter ( c-type -- n cuda-type ) { { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] } @@ -353,13 +71,13 @@ MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) ) swap '[ _ param-size* ] suffix '[ _ cleave ] ; -: define-cuda-word ( word string arguments -- ) +: define-cuda-word ( word module-name function-name arguments -- ) [ '[ - _ get-function-ptr + _ _ cached-function [ nip _ cuda-arguments ] [ run-function-launcher ] 2bi ] ] - [ nip \ function-launcher suffix a:void function-effect ] - 2bi define-declared ; + [ 2nip \ function-launcher suffix a:void function-effect ] + 3bi define-declared ; diff --git a/extra/cuda/demos/hello-world/hello-world.factor b/extra/cuda/demos/hello-world/hello-world.factor index 540c4b9148..8855ce6fea 100644 --- a/extra/cuda/demos/hello-world/hello-world.factor +++ b/extra/cuda/demos/hello-world/hello-world.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types alien.strings cuda cuda.syntax destructors -io.encodings.utf8 kernel locals math prettyprint sequences ; +USING: alien.c-types alien.strings cuda cuda.memory cuda.syntax +destructors io io.encodings.utf8 kernel locals math sequences ; IN: cuda.demos.hello-world CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx @@ -9,22 +9,13 @@ CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx CUDA-FUNCTION: helloWorld ( char* string-ptr ) ; :: cuda-hello-world ( -- ) - T{ launcher - { device 0 } - { path "vocab:cuda/demos/hello-world/hello.ptx" } - } [ - "Hello World!" [ - ] map-index malloc-device-string &dispose dup :> str + T{ launcher { device 0 } } [ + "Hello World!" [ - ] map-index malloc-device-string + &dispose dup :> str - T{ function-launcher - { dim-block { 6 1 1 } } - { dim-grid { 2 1 } } - { shared-size 0 } - } - helloWorld + { 6 1 1 } { 2 1 } 1 3<<< helloWorld - ! <<< { 6 1 1 } { 2 1 } 1 >>> helloWorld - - str device>host utf8 alien>string . + str device>host utf8 alien>string print ] with-cuda ; MAIN: cuda-hello-world diff --git a/extra/cuda/demos/prefix-sum/prefix-sum.factor b/extra/cuda/demos/prefix-sum/prefix-sum.factor index 2cd8eba166..c7e59b515a 100644 --- a/extra/cuda/demos/prefix-sum/prefix-sum.factor +++ b/extra/cuda/demos/prefix-sum/prefix-sum.factor @@ -8,14 +8,9 @@ CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ; :: cuda-prefix-sum ( -- ) - T{ launcher - { device 0 } - { path "vocab:cuda/demos/prefix-sum/prefix-sum.ptx" } - } [ - - + T{ launcher { device 0 } } + [ ! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block - ] with-cuda ; MAIN: cuda-prefix-sum diff --git a/extra/cuda/memory/authors.txt b/extra/cuda/memory/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/cuda/memory/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/cuda/memory/memory.factor b/extra/cuda/memory/memory.factor new file mode 100644 index 0000000000..c3dfe56a53 --- /dev/null +++ b/extra/cuda/memory/memory.factor @@ -0,0 +1,74 @@ +! Copyright (C) 2010 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien alien.data assocs byte-arrays cuda.ffi +cuda.utils destructors io.encodings.string io.encodings.utf8 +kernel locals namespaces sequences ; +QUALIFIED-WITH: alien.c-types a +IN: cuda.memory + +SYMBOL: cuda-memory-hashtable + +TUPLE: cuda-memory < disposable ptr length ; + +: ( ptr length -- obj ) + cuda-memory new-disposable + swap >>length + swap >>ptr ; + +: add-cuda-memory ( obj -- obj ) + dup dup ptr>> cuda-memory-hashtable get set-at ; + +: delete-cuda-memory ( obj -- ) + cuda-memory-hashtable delete-at ; + +ERROR: invalid-cuda-memory ptr ; + +: cuda-memory-length ( cuda-memory -- n ) + ptr>> cuda-memory-hashtable get ?at [ + length>> + ] [ + invalid-cuda-memory + ] if ; + +M: cuda-memory byte-length length>> ; + +: cuda-malloc ( n -- ptr ) + [ CUdeviceptr ] dip + [ cuMemAlloc cuda-error ] 2keep + [ a:*int ] dip add-cuda-memory ; + +: cuda-free* ( ptr -- ) + cuMemFree cuda-error ; + +M: cuda-memory dispose ( ptr -- ) + ptr>> cuda-free* ; + +: memcpy-device>device ( dest-ptr src-ptr count -- ) + cuMemcpyDtoD cuda-error ; + +: memcpy-device>array ( dest-array dest-index src-ptr count -- ) + cuMemcpyDtoA cuda-error ; + +: memcpy-array>device ( dest-ptr src-array src-index count -- ) + cuMemcpyAtoD cuda-error ; + +: memcpy-array>host ( dest-ptr src-array src-index count -- ) + cuMemcpyAtoH cuda-error ; + +: memcpy-host>array ( dest-array dest-index src-ptr count -- ) + cuMemcpyHtoA cuda-error ; + +: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- ) + cuMemcpyAtoA cuda-error ; + +: host>device ( dest-ptr src-ptr -- ) + [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ; + +:: device>host ( ptr -- seq ) + ptr byte-length + [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ; + +: malloc-device-string ( string -- n ) + utf8 encode + [ length cuda-malloc ] keep + [ host>device ] [ drop ] 2bi ; diff --git a/extra/cuda/syntax/syntax.factor b/extra/cuda/syntax/syntax.factor index b8df30f61c..1cd5edb9d4 100644 --- a/extra/cuda/syntax/syntax.factor +++ b/extra/cuda/syntax/syntax.factor @@ -1,12 +1,17 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.parser cuda kernel lexer parser ; +USING: alien.parser cuda cuda.utils io.backend kernel lexer +namespaces parser ; IN: cuda.syntax -SYNTAX: CUDA-LIBRARY: scan scan add-cuda-library ; +SYNTAX: CUDA-LIBRARY: + scan scan normalize-path + [ add-cuda-library ] + [ drop current-cuda-library set-global ] 2bi ; SYNTAX: CUDA-FUNCTION: - scan [ create-in ] [ ] bi ";" scan-c-args drop define-cuda-word ; + scan [ create-in current-cuda-library get ] [ ] bi + ";" scan-c-args drop define-cuda-word ; : 3<<< ( dim-block dim-grid shared-size -- function-launcher ) f function-launcher boa ; diff --git a/extra/cuda/utils/authors.txt b/extra/cuda/utils/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/cuda/utils/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/cuda/utils/utils.factor b/extra/cuda/utils/utils.factor new file mode 100644 index 0000000000..b10f42e8d2 --- /dev/null +++ b/extra/cuda/utils/utils.factor @@ -0,0 +1,204 @@ +! Copyright (C) 2010 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien.data alien.strings arrays assocs +byte-arrays classes.struct combinators cuda.ffi io io.backend +io.encodings.utf8 kernel math.parser namespaces prettyprint +sequences ; +QUALIFIED-WITH: alien.c-types a +IN: cuda.utils + +SYMBOL: cuda-device +SYMBOL: cuda-context +SYMBOL: cuda-module +SYMBOL: cuda-function +SYMBOL: cuda-launcher + +SYMBOL: cuda-modules +SYMBOL: cuda-functions + +ERROR: throw-cuda-error n ; + +: cuda-error ( n -- ) + dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ; + +: init-cuda ( -- ) + 0 cuInit cuda-error ; + +: cuda-version ( -- n ) + a:int [ cuDriverGetVersion cuda-error ] keep a:*int ; + +: #cuda-devices ( -- n ) + a:int [ cuDeviceGetCount cuda-error ] keep a:*int ; + +: n>cuda-device ( n -- device ) + [ CUdevice ] dip [ cuDeviceGet cuda-error ] 2keep drop a:*int ; + +: enumerate-cuda-devices ( -- devices ) + #cuda-devices iota [ n>cuda-device ] map ; + +: cuda-device-properties ( device -- properties ) + [ CUdevprop ] dip + [ cuDeviceGetProperties cuda-error ] 2keep drop + CUdevprop memory>struct ; + +: cuda-devices ( -- assoc ) + enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ; + +: cuda-device-name ( n -- string ) + [ 256 [ ] keep ] dip + [ cuDeviceGetName cuda-error ] + [ 2drop utf8 alien>string ] 3bi ; + +: cuda-device-capability ( n -- pair ) + [ a:int a:int ] dip + [ cuDeviceComputeCapability cuda-error ] + [ drop [ a:*int ] bi@ ] 3bi 2array ; + +: cuda-device-memory ( n -- bytes ) + [ a:uint ] dip + [ cuDeviceTotalMem cuda-error ] + [ drop a:*uint ] 2bi ; + +: get-function-ptr* ( module string -- function ) + [ CUfunction ] 2dip + [ cuModuleGetFunction cuda-error ] 3keep 2drop a:*void* ; + +: get-function-ptr ( string -- function ) + [ cuda-module get ] dip get-function-ptr* ; + +: with-cuda-function ( string quot -- ) + [ + get-function-ptr* cuda-function set + ] dip call ; inline + +: create-context ( flags device -- context ) + [ CUcontext ] 2dip + [ cuCtxCreate cuda-error ] 3keep 2drop a:*void* ; + +: destroy-context ( context -- ) cuCtxDestroy cuda-error ; + +SYMBOL: cuda-libraries +cuda-libraries [ H{ } clone ] initialize + +SYMBOL: current-cuda-library + +TUPLE: cuda-library name path handle ; + +: ( name path -- obj ) + \ cuda-library new + swap >>path + swap >>name ; + +: add-cuda-library ( name path -- ) + normalize-path + dup name>> cuda-libraries get-global set-at ; + +: ?delete-at ( key assoc -- old/key ? ) + 2dup delete-at* [ 2nip t ] [ 2drop f ] if ; inline + +ERROR: no-cuda-library name ; + +: load-module ( path -- module ) + [ CUmodule ] dip + [ cuModuleLoad cuda-error ] 2keep drop a:*void* ; + +: unload-module ( module -- ) + cuModuleUnload cuda-error ; + +: load-cuda-library ( library -- handle ) + path>> load-module ; + +: lookup-cuda-library ( name -- cuda-library ) + cuda-libraries get ?at [ no-cuda-library ] unless ; + +: remove-cuda-library ( name -- library ) + cuda-libraries get ?delete-at [ no-cuda-library ] unless ; + +: unload-cuda-library ( name -- ) + remove-cuda-library handle>> unload-module ; + + +: cached-module ( module-name -- alien ) + lookup-cuda-library + cuda-modules get-global [ load-cuda-library ] cache ; + +: cached-function ( module-name function-name -- alien ) + [ cached-module ] dip + 2array cuda-functions get [ first2 get-function-ptr* ] cache ; + +: launch-function* ( function -- ) cuLaunch cuda-error ; + +: launch-function ( -- ) cuda-function get cuLaunch cuda-error ; + +: cuda-int* ( function offset value -- ) + cuParamSeti cuda-error ; + +: cuda-int ( offset value -- ) + [ cuda-function get ] 2dip cuda-int* ; + +: cuda-float* ( function offset value -- ) + cuParamSetf cuda-error ; + +: cuda-float ( offset value -- ) + [ cuda-function get ] 2dip cuda-float* ; + +: cuda-vector* ( function offset ptr n -- ) + cuParamSetv cuda-error ; + +: cuda-vector ( offset ptr n -- ) + [ cuda-function get ] 3dip cuda-vector* ; + +: param-size* ( function n -- ) + cuParamSetSize cuda-error ; + +: param-size ( n -- ) + [ cuda-function get ] dip param-size* ; + +: launch-function-grid* ( function width height -- ) + cuLaunchGrid cuda-error ; + +: launch-function-grid ( width height -- ) + [ cuda-function get ] 2dip + cuLaunchGrid cuda-error ; + +ERROR: bad-cuda-parameter parameter ; + +: cuda-device-attribute ( attribute dev -- n ) + [ a:int ] 2dip + [ cuDeviceGetAttribute cuda-error ] + [ 2drop a:*int ] 3bi ; + +: function-block-shape* ( function x y z -- ) + cuFuncSetBlockShape cuda-error ; + +: function-block-shape ( x y z -- ) + [ cuda-function get ] 3dip + cuFuncSetBlockShape cuda-error ; + +: function-shared-size* ( function n -- ) + cuFuncSetSharedSize cuda-error ; + +: function-shared-size ( n -- ) + [ cuda-function get ] dip + cuFuncSetSharedSize cuda-error ; + +: cuda-device. ( n -- ) + { + [ "Device: " write number>string print ] + [ "Name: " write cuda-device-name print ] + [ "Memory: " write cuda-device-memory number>string print ] + [ + "Capability: " write + cuda-device-capability [ number>string ] map " " join print + ] + [ "Properties: " write cuda-device-properties . ] + [ + "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write + CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap + cuda-device-attribute number>string print + ] + } cleave ; + +: cuda. ( -- ) + "CUDA Version: " write cuda-version number>string print nl + #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;