diff --git a/extra/cuda/cuda.factor b/extra/cuda/cuda.factor index 2c09fd176f..9c9b74a9bb 100644 --- a/extra/cuda/cuda.factor +++ b/extra/cuda/cuda.factor @@ -34,21 +34,18 @@ dim-block dim-grid shared-size stream ; '[ cuda-context set _ call ] with-cuda-context ; inline : with-cuda ( launcher quot -- ) - init-cuda - [ H{ } clone cuda-memory-hashtable ] 2dip '[ - _ + init-cuda [ [ cuda-launcher set ] [ [ device>> ] [ device-flags>> ] bi ] bi - _ with-cuda-program - ] with-variable ; inline + ] [ with-cuda-program ] bi* ; inline : c-type>cuda-setter ( c-type -- n cuda-type ) { { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] } { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] } { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] } - { [ dup a:pointer? ] [ drop 4 [ ptr>> cuda-int* ] ] } - { [ dup a:void* = ] [ drop 4 [ ptr>> cuda-int* ] ] } + { [ dup a:pointer? ] [ drop 4 [ cuda-int* ] ] } + { [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] } } cond ; : run-function-launcher ( function-launcher function -- ) diff --git a/extra/cuda/demos/hello-world/hello-world.factor b/extra/cuda/demos/hello-world/hello-world.factor index 789948be68..1c9b8a51f7 100644 --- a/extra/cuda/demos/hello-world/hello-world.factor +++ b/extra/cuda/demos/hello-world/hello-world.factor @@ -3,7 +3,7 @@ USING: accessors alien.c-types alien.strings cuda cuda.devices cuda.memory cuda.syntax cuda.utils destructors io io.encodings.string io.encodings.utf8 kernel locals math -math.parser namespaces sequences ; +math.parser namespaces sequences byte-arrays strings ; IN: cuda.demos.hello-world CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx @@ -12,12 +12,14 @@ CUDA-FUNCTION: helloWorld ( char* string-ptr ) ; : cuda-hello-world ( -- ) [ - cuda-launcher get device>> number>string - "CUDA device " ": " surround write - "Hello World!" [ - ] map-index host>device + [ + cuda-launcher get device>> number>string + "CUDA device " ": " surround write + "Hello World!" >byte-array [ - ] map-index host>device &cuda-free - [ { 6 1 1 } { 2 1 } 2<<< helloWorld ] - [ device>host utf8 decode print ] bi + [ { 6 1 1 } { 2 1 } 2<<< helloWorld ] + [ 12 device>host >string print ] bi + ] with-destructors ] with-each-cuda-device ; MAIN: cuda-hello-world diff --git a/extra/cuda/memory/memory.factor b/extra/cuda/memory/memory.factor index 1ababcb8a0..248682aecc 100644 --- a/extra/cuda/memory/memory.factor +++ b/extra/cuda/memory/memory.factor @@ -1,75 +1,48 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien alien.data assocs byte-arrays cuda.ffi -cuda.utils destructors io.encodings.string io.encodings.utf8 -kernel locals namespaces sequences strings ; -QUALIFIED-WITH: alien.c-types a +USING: accessors alien alien.data alien.destructors assocs +byte-arrays cuda.ffi cuda.utils destructors fry io.encodings.string +io.encodings.utf8 kernel locals namespaces sequences strings ; +QUALIFIED-WITH: alien.c-types c IN: cuda.memory -SYMBOL: cuda-memory-hashtable - -TUPLE: cuda-memory < disposable ptr length ; - -: ( ptr length -- obj ) - cuda-memory new-disposable - swap >>length - swap >>ptr ; - -: add-cuda-memory ( obj -- obj ) - dup dup ptr>> cuda-memory-hashtable get set-at ; - -: delete-cuda-memory ( obj -- ) - cuda-memory-hashtable delete-at ; - -ERROR: invalid-cuda-memory ptr ; - -: cuda-memory-length ( cuda-memory -- n ) - ptr>> cuda-memory-hashtable get ?at [ - length>> - ] [ - invalid-cuda-memory - ] if ; - -M: cuda-memory byte-length length>> ; - : cuda-malloc ( n -- ptr ) [ CUdeviceptr ] dip - [ cuMemAlloc cuda-error ] 2keep - [ a:*int ] dip add-cuda-memory ; + '[ _ cuMemAlloc cuda-error ] keep + c:*int ; -: cuda-free* ( ptr -- ) +: cuda-free ( ptr -- ) cuMemFree cuda-error ; -M: cuda-memory dispose ( ptr -- ) - ptr>> cuda-free* ; +DESTRUCTOR: cuda-free : memcpy-device>device ( dest-ptr src-ptr count -- ) - cuMemcpyDtoD cuda-error ; + cuMemcpyDtoD cuda-error ; inline : memcpy-device>array ( dest-array dest-index src-ptr count -- ) - cuMemcpyDtoA cuda-error ; + cuMemcpyDtoA cuda-error ; inline : memcpy-array>device ( dest-ptr src-array src-index count -- ) - cuMemcpyAtoD cuda-error ; + cuMemcpyAtoD cuda-error ; inline : memcpy-array>host ( dest-ptr src-array src-index count -- ) - cuMemcpyAtoH cuda-error ; + cuMemcpyAtoH cuda-error ; inline : memcpy-host>array ( dest-array dest-index src-ptr count -- ) - cuMemcpyHtoA cuda-error ; + cuMemcpyHtoA cuda-error ; inline : memcpy-array>array ( dest-array dest-index src-array src-ptr count -- ) - cuMemcpyAtoA cuda-error ; + cuMemcpyAtoA cuda-error ; inline -GENERIC: host>device ( obj -- ptr ) +: memcpy-host>device ( dest-ptr src-ptr count -- ) + cuMemcpyHtoD cuda-error ; inline -M: string host>device utf8 encode host>device ; +: memcpy-device>host ( dest-ptr src-ptr count -- ) + cuMemcpyDtoH cuda-error ; inline -M: byte-array host>device ( byte-array -- ptr ) - [ length cuda-malloc ] keep - [ [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ] - [ drop ] 2bi ; +: host>device ( data -- ptr ) + [ >c-ptr ] [ byte-length ] bi + [ nip cuda-malloc dup ] [ memcpy-host>device ] 2bi ; inline -:: device>host ( ptr -- seq ) - ptr byte-length - [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ; +: device>host ( ptr len -- byte-array ) + [ nip dup ] [ memcpy-device>host ] 2bi ; inline