From 5c17e6ee98424db6e9f6a55b655a7e6589707472 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sun, 18 Apr 2010 18:33:18 -0500 Subject: [PATCH] CUDA-FUNCTION: works, splitting up CUDA into more vocabs --- extra/cuda/cuda.factor | 131 +++++++++++++----- extra/cuda/demos/hello-world/authors.txt | 1 + .../cuda/demos/hello-world/hello-world.factor | 30 ++++ extra/cuda/{ => demos/hello-world}/hello.cu | 0 extra/cuda/{ => demos/hello-world}/hello.ptx | 0 extra/cuda/demos/prefix-sum/authors.txt | 2 + .../cuda/{ => demos/prefix-sum}/prefix-sum.cu | 0 extra/cuda/demos/prefix-sum/prefix-sum.factor | 21 +++ .../{ => demos/prefix-sum}/prefix-sum.ptx | 0 extra/cuda/syntax/authors.txt | 1 + extra/cuda/syntax/syntax.factor | 15 ++ 11 files changed, 164 insertions(+), 37 deletions(-) create mode 100644 extra/cuda/demos/hello-world/authors.txt create mode 100644 extra/cuda/demos/hello-world/hello-world.factor rename extra/cuda/{ => demos/hello-world}/hello.cu (100%) rename extra/cuda/{ => demos/hello-world}/hello.ptx (100%) create mode 100644 extra/cuda/demos/prefix-sum/authors.txt rename extra/cuda/{ => demos/prefix-sum}/prefix-sum.cu (100%) create mode 100644 extra/cuda/demos/prefix-sum/prefix-sum.factor rename extra/cuda/{ => demos/prefix-sum}/prefix-sum.ptx (100%) create mode 100644 extra/cuda/syntax/authors.txt create mode 100644 extra/cuda/syntax/syntax.factor diff --git a/extra/cuda/cuda.factor b/extra/cuda/cuda.factor index 6b343fb1cc..d8b6f2e2ce 100644 --- a/extra/cuda/cuda.factor +++ b/extra/cuda/cuda.factor @@ -1,11 +1,13 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien alien.c-types alien.data alien.parser -alien.strings arrays assocs byte-arrays classes.struct +USING: accessors alien alien.data alien.parser alien.strings +alien.syntax arrays assocs byte-arrays classes.struct combinators continuations cuda.ffi destructors fry io io.backend io.encodings.string io.encodings.utf8 kernel lexer -locals math math.parser namespaces opengl.gl.extensions -prettyprint quotations sequences ; +locals macros math math.parser namespaces nested-comments +opengl.gl.extensions parser prettyprint quotations sequences +words ; +QUALIFIED-WITH: alien.c-types a IN: cuda SYMBOL: cuda-device @@ -15,13 +17,32 @@ SYMBOL: cuda-function SYMBOL: cuda-launcher SYMBOL: cuda-memory-hashtable +SYMBOL: cuda-libraries +cuda-libraries [ H{ } clone ] initialize + +SYMBOL: cuda-functions + +TUPLE: cuda-library name path ; + +: ( name path -- obj ) + \ cuda-library new + swap >>path + swap >>name ; + +: add-cuda-library ( name path -- ) + normalize-path + dup name>> cuda-libraries get set-at ; + +: cuda-library ( name -- cuda-library ) + cuda-libraries get at ; + ERROR: throw-cuda-error n ; : cuda-error ( n -- ) dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ; : cuda-version ( -- n ) - int [ cuDriverGetVersion cuda-error ] keep *int ; + a:int [ cuDriverGetVersion cuda-error ] keep a:*int ; : init-cuda ( -- ) 0 cuInit cuda-error ; @@ -29,12 +50,19 @@ ERROR: throw-cuda-error n ; TUPLE: launcher { device integer initial: 0 } { device-flags initial: 0 } -path block-shape shared-size grid ; +path ; + +TUPLE: function-launcher +dim-block +dim-grid +shared-size +stream ; : with-cuda-context ( flags device quot -- ) + H{ } clone cuda-functions set [ [ CUcontext ] 2dip - [ cuCtxCreate cuda-error ] 3keep 2drop *void* + [ cuCtxCreate cuda-error ] 3keep 2drop a:*void* ] dip [ '[ _ @ ] ] [ drop '[ _ cuCtxDestroy cuda-error ] ] 2bi @@ -44,7 +72,7 @@ path block-shape shared-size grid ; [ normalize-path [ CUmodule ] dip - [ cuModuleLoad cuda-error ] 2keep drop *void* + [ cuModuleLoad cuda-error ] 2keep drop a:*void* ] dip [ '[ _ @ ] ] [ drop '[ _ cuModuleUnload cuda-error ] ] 2bi @@ -74,10 +102,10 @@ path block-shape shared-size grid ; [ cuDeviceGetCount cuda-error ] keep *int ; + a:int [ cuDeviceGetCount cuda-error ] keep a:*int ; : n>cuda-device ( n -- device ) - [ CUdevice ] dip [ cuDeviceGet cuda-error ] 2keep drop *int ; + [ CUdevice ] dip [ cuDeviceGet cuda-error ] 2keep drop a:*int ; : enumerate-cuda-devices ( -- devices ) #cuda-devices iota [ n>cuda-device ] map ; @@ -98,27 +126,30 @@ PRIVATE> [ 2drop utf8 alien>string ] 3bi ; : cuda-device-capability ( n -- pair ) - [ int int ] dip + [ a:int a:int ] dip [ cuDeviceComputeCapability cuda-error ] - [ drop [ *int ] bi@ ] 3bi 2array ; + [ drop [ a:*int ] bi@ ] 3bi 2array ; : cuda-device-memory ( n -- bytes ) - [ uint ] dip + [ a:uint ] dip [ cuDeviceTotalMem cuda-error ] - [ drop *uint ] 2bi ; + [ drop a:*uint ] 2bi ; -: get-cuda-function* ( module string -- function ) +: get-function-ptr* ( module string -- function ) [ CUfunction ] 2dip - [ cuModuleGetFunction cuda-error ] 3keep 2drop *void* ; + [ cuModuleGetFunction cuda-error ] 3keep 2drop a:*void* ; -: get-cuda-function ( string -- function ) - [ cuda-module get ] dip get-cuda-function* ; +: get-function-ptr ( string -- function ) + [ cuda-module get ] dip get-function-ptr* ; : with-cuda-function ( string quot -- ) [ - get-cuda-function cuda-function set + get-function-ptr* cuda-function set ] dip call ; inline +: cached-cuda-function ( string -- alien ) + cuda-functions get [ get-function-ptr ] cache ; + : launch-function* ( function -- ) cuLaunch cuda-error ; : launch-function ( -- ) cuda-function get cuLaunch cuda-error ; @@ -157,7 +188,7 @@ M: cuda-memory byte-length length>> ; : cuda-malloc ( n -- ptr ) [ CUdeviceptr ] dip [ cuMemAlloc cuda-error ] 2keep - [ *int ] dip add-cuda-memory ; + [ a:*int ] dip add-cuda-memory ; : cuda-free* ( ptr -- ) cuMemFree cuda-error ; @@ -237,9 +268,9 @@ ERROR: bad-cuda-parameter parameter ; offset param-size ; : cuda-device-attribute ( attribute dev -- n ) - [ int ] 2dip + [ a:int ] 2dip [ cuDeviceGetAttribute cuda-error ] - [ 2drop *int ] 3bi ; + [ 2drop a:*int ] 3bi ; : function-block-shape* ( function x y z -- ) cuFuncSetBlockShape cuda-error ; @@ -289,20 +320,46 @@ ERROR: bad-cuda-parameter parameter ; "CUDA Version: " write cuda-version number>string print nl #cuda-devices iota [ nl ] [ cuda-device. ] interleave ; +: c-type>cuda-setter ( c-type -- n cuda-type ) + { + { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] } + { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] } + { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] } + { [ dup a:pointer? ] [ drop 4 [ ptr>> cuda-int* ] ] } + { [ dup a:void* = ] [ drop 4 [ ptr>> cuda-int* ] ] } + } cond ; -: test-cuda0 ( -- ) - T{ launcher - { path "vocab:cuda/hello.ptx" } - { block-shape { 6 6 6 } } - { shared-size 2 } - { grid { 2 6 } } - } [ - "helloWorld" [ - "Hello World!" [ - ] map-index - malloc-device-string &dispose +: run-function-launcher ( function-launcher function -- ) + swap + { + [ dim-block>> first3 function-block-shape* ] + [ shared-size>> function-shared-size* ] + [ + dim-grid>> [ + launch-function* + ] [ + first2 launch-function-grid* + ] if-empty + ] + } 2cleave ; - [ 1array set-parameters ] - [ drop launch ] - [ device>host utf8 alien>string . ] tri - ] with-cuda-function - ] with-cuda ; +: cuda-argument-setter ( offset c-type -- offset' quot ) + c-type>cuda-setter + [ over [ + ] dip ] dip + '[ swap _ swap _ call ] ; + +MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) ) + [ 0 ] dip [ cuda-argument-setter ] map reverse + swap '[ _ param-size* ] suffix + '[ _ cleave ] ; + +: define-cuda-word ( word string arguments -- ) + [ + '[ + _ get-function-ptr + [ nip _ cuda-arguments ] + [ run-function-launcher ] 2bi + ] + ] + [ nip \ function-launcher suffix a:void function-effect ] + 2bi define-declared ; diff --git a/extra/cuda/demos/hello-world/authors.txt b/extra/cuda/demos/hello-world/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/cuda/demos/hello-world/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/cuda/demos/hello-world/hello-world.factor b/extra/cuda/demos/hello-world/hello-world.factor new file mode 100644 index 0000000000..6a598dda44 --- /dev/null +++ b/extra/cuda/demos/hello-world/hello-world.factor @@ -0,0 +1,30 @@ +! Copyright (C) 2010 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: alien.c-types alien.strings cuda cuda.syntax destructors +io.encodings.utf8 kernel locals math prettyprint sequences ; +IN: cuda.hello-world + +CUDA-LIBRARY: hello vocab:cuda/hello.ptx + +CUDA-FUNCTION: helloWorld ( char* string-ptr ) ; + +:: cuda-hello-world ( -- ) + T{ launcher + { device 0 } + { path "vocab:cuda/hello.ptx" } + } [ + "Hello World!" [ - ] map-index malloc-device-string &dispose dup :> str + + T{ function-launcher + { dim-block { 6 1 1 } } + { dim-grid { 2 1 } } + { shared-size 0 } + } + helloWorld + + ! <<< { 6 1 1 } { 2 1 } 1 >>> helloWorld + + str device>host utf8 alien>string . + ] with-cuda ; + +MAIN: cuda-hello-world diff --git a/extra/cuda/hello.cu b/extra/cuda/demos/hello-world/hello.cu similarity index 100% rename from extra/cuda/hello.cu rename to extra/cuda/demos/hello-world/hello.cu diff --git a/extra/cuda/hello.ptx b/extra/cuda/demos/hello-world/hello.ptx similarity index 100% rename from extra/cuda/hello.ptx rename to extra/cuda/demos/hello-world/hello.ptx diff --git a/extra/cuda/demos/prefix-sum/authors.txt b/extra/cuda/demos/prefix-sum/authors.txt new file mode 100644 index 0000000000..2d6d4567d3 --- /dev/null +++ b/extra/cuda/demos/prefix-sum/authors.txt @@ -0,0 +1,2 @@ +Doug Coleman +Joe Groff diff --git a/extra/cuda/prefix-sum.cu b/extra/cuda/demos/prefix-sum/prefix-sum.cu similarity index 100% rename from extra/cuda/prefix-sum.cu rename to extra/cuda/demos/prefix-sum/prefix-sum.cu diff --git a/extra/cuda/demos/prefix-sum/prefix-sum.factor b/extra/cuda/demos/prefix-sum/prefix-sum.factor new file mode 100644 index 0000000000..2cd8eba166 --- /dev/null +++ b/extra/cuda/demos/prefix-sum/prefix-sum.factor @@ -0,0 +1,21 @@ +! Copyright (C) 2010 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: alien.c-types cuda cuda.syntax locals ; +IN: cuda.demos.prefix-sum + +CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx + +CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ; + +:: cuda-prefix-sum ( -- ) + T{ launcher + { device 0 } + { path "vocab:cuda/demos/prefix-sum/prefix-sum.ptx" } + } [ + + + ! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block + + ] with-cuda ; + +MAIN: cuda-prefix-sum diff --git a/extra/cuda/prefix-sum.ptx b/extra/cuda/demos/prefix-sum/prefix-sum.ptx similarity index 100% rename from extra/cuda/prefix-sum.ptx rename to extra/cuda/demos/prefix-sum/prefix-sum.ptx diff --git a/extra/cuda/syntax/authors.txt b/extra/cuda/syntax/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/cuda/syntax/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/cuda/syntax/syntax.factor b/extra/cuda/syntax/syntax.factor new file mode 100644 index 0000000000..b8df30f61c --- /dev/null +++ b/extra/cuda/syntax/syntax.factor @@ -0,0 +1,15 @@ +! Copyright (C) 2010 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: alien.parser cuda kernel lexer parser ; +IN: cuda.syntax + +SYNTAX: CUDA-LIBRARY: scan scan add-cuda-library ; + +SYNTAX: CUDA-FUNCTION: + scan [ create-in ] [ ] bi ";" scan-c-args drop define-cuda-word ; + +: 3<<< ( dim-block dim-grid shared-size -- function-launcher ) + f function-launcher boa ; + +: 4<<< ( dim-block dim-grid shared-size stream -- function-launcher ) + function-launcher boa ;