cuda.devices: "distribute-jobs" utility word that determines grid and block size for a job based on per-thread shared memory requirements and max block size
parent
097100cb12
commit
f26a5836e6
|
@ -7,7 +7,7 @@ destructors fry init io io.backend io.encodings.string
|
||||||
io.encodings.utf8 kernel lexer locals macros math math.parser
|
io.encodings.utf8 kernel lexer locals macros math math.parser
|
||||||
namespaces nested-comments opengl.gl.extensions parser
|
namespaces nested-comments opengl.gl.extensions parser
|
||||||
prettyprint quotations sequences words cuda.libraries ;
|
prettyprint quotations sequences words cuda.libraries ;
|
||||||
QUALIFIED-WITH: alien.c-types a
|
QUALIFIED-WITH: alien.c-types c
|
||||||
IN: cuda
|
IN: cuda
|
||||||
|
|
||||||
TUPLE: launcher
|
TUPLE: launcher
|
||||||
|
@ -41,11 +41,11 @@ dim-grid dim-block shared-size stream ;
|
||||||
|
|
||||||
: c-type>cuda-setter ( c-type -- n cuda-type )
|
: c-type>cuda-setter ( c-type -- n cuda-type )
|
||||||
{
|
{
|
||||||
{ [ dup a:int = ] [ drop 4 [ cuda-int* ] ] }
|
{ [ dup c:int = ] [ drop 4 [ cuda-int* ] ] }
|
||||||
{ [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] }
|
{ [ dup c:uint = ] [ drop 4 [ cuda-int* ] ] }
|
||||||
{ [ dup a:float = ] [ drop 4 [ cuda-float* ] ] }
|
{ [ dup c:float = ] [ drop 4 [ cuda-float* ] ] }
|
||||||
{ [ dup a:pointer? ] [ drop 4 [ cuda-int* ] ] }
|
{ [ dup c:pointer? ] [ drop 4 [ cuda-int* ] ] }
|
||||||
{ [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] }
|
{ [ dup c:void* = ] [ drop 4 [ cuda-int* ] ] }
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -85,5 +85,5 @@ MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) )
|
||||||
[ run-function-launcher ] 2bi
|
[ run-function-launcher ] 2bi
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
[ 2nip \ function-launcher suffix a:void function-effect ]
|
[ 2nip \ function-launcher suffix c:void function-effect ]
|
||||||
3bi define-declared ;
|
3bi define-declared ;
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
! (c)2010 Joe Groff bsd license
|
||||||
|
USING: cuda.devices tools.test ;
|
||||||
|
IN: cuda.devices.tests
|
||||||
|
|
||||||
|
[ 1 5 100 ] [ 5 20 100 10 (distribute-jobs) ] unit-test
|
||||||
|
[ 2 5 100 ] [ 10 20 100 10 (distribute-jobs) ] unit-test
|
||||||
|
[ 2 5 100 ] [ 10 20 200 5 (distribute-jobs) ] unit-test
|
||||||
|
[ 2 5 100 ] [ 10 20 300 6 (distribute-jobs) ] unit-test
|
||||||
|
[ 2 6 120 ] [ 11 20 300 6 (distribute-jobs) ] unit-test
|
||||||
|
[ 1 10 200 ] [ 10 20 200 10 (distribute-jobs) ] unit-test
|
||||||
|
[ 1 10 0 ] [ 10 0 200 10 (distribute-jobs) ] unit-test
|
||||||
|
[ 2 5 0 ] [ 10 0 200 9 (distribute-jobs) ] unit-test
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
! Copyright (C) 2010 Doug Coleman.
|
! Copyright (C) 2010 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: alien.c-types alien.data alien.strings arrays assocs
|
USING: accessors alien.c-types alien.data alien.strings arrays
|
||||||
byte-arrays classes.struct combinators cuda cuda.ffi cuda.utils
|
assocs byte-arrays classes.struct combinators cuda cuda.ffi
|
||||||
fry io io.encodings.utf8 kernel math.parser prettyprint
|
cuda.utils fry io io.encodings.utf8 kernel locals math
|
||||||
sequences ;
|
math.order math.parser namespaces prettyprint sequences ;
|
||||||
IN: cuda.devices
|
IN: cuda.devices
|
||||||
|
|
||||||
: #cuda-devices ( -- n )
|
: #cuda-devices ( -- n )
|
||||||
|
@ -19,9 +19,8 @@ IN: cuda.devices
|
||||||
[ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline
|
[ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline
|
||||||
|
|
||||||
: cuda-device-properties ( n -- properties )
|
: cuda-device-properties ( n -- properties )
|
||||||
[ CUdevprop <c-object> ] dip
|
[ CUdevprop <struct> ] dip
|
||||||
[ cuDeviceGetProperties cuda-error ] 2keep drop
|
[ cuDeviceGetProperties cuda-error ] 2keep drop ;
|
||||||
CUdevprop memory>struct ;
|
|
||||||
|
|
||||||
: cuda-devices ( -- assoc )
|
: cuda-devices ( -- assoc )
|
||||||
enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
|
enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
|
||||||
|
@ -68,3 +67,20 @@ IN: cuda.devices
|
||||||
"CUDA Version: " write cuda-version number>string print nl
|
"CUDA Version: " write cuda-version number>string print nl
|
||||||
#cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
|
#cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
|
||||||
|
|
||||||
|
: up/i ( x y -- z )
|
||||||
|
[ 1 - + ] keep /i ; inline
|
||||||
|
|
||||||
|
:: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size
|
||||||
|
-- grid-size block-size per-block-shared )
|
||||||
|
per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero
|
||||||
|
job-count min :> job-max-block-size
|
||||||
|
job-count job-max-block-size up/i :> grid-size
|
||||||
|
job-count grid-size up/i :> block-size
|
||||||
|
block-size per-job-shared * :> per-block-shared
|
||||||
|
|
||||||
|
grid-size block-size per-block-shared ; inline
|
||||||
|
|
||||||
|
: distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared )
|
||||||
|
cuda-device get cuda-device-properties
|
||||||
|
[ sharedMemPerBlock>> ] [ maxThreadsDim>> ] bi
|
||||||
|
(distribute-jobs) ; inline
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
! Copyright (C) 2010 Doug Coleman.
|
! Copyright (C) 2010 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors alien.c-types alien.data alien.strings arrays
|
USING: accessors alien.c-types alien.data alien.strings arrays
|
||||||
assocs byte-arrays classes.struct combinators cuda.ffi io
|
assocs byte-arrays classes.struct combinators cuda.devices cuda.ffi
|
||||||
io.backend io.encodings.utf8 kernel math.parser namespaces
|
io io.backend io.encodings.utf8 kernel math.parser namespaces
|
||||||
prettyprint sequences ;
|
prettyprint sequences ;
|
||||||
IN: cuda.utils
|
IN: cuda.utils
|
||||||
|
|
||||||
|
@ -92,5 +92,3 @@ ERROR: throw-cuda-error n ;
|
||||||
: function-shared-size ( n -- )
|
: function-shared-size ( n -- )
|
||||||
[ cuda-function get ] dip
|
[ cuda-function get ] dip
|
||||||
cuFuncSetSharedSize cuda-error ;
|
cuFuncSetSharedSize cuda-error ;
|
||||||
|
|
||||||
: distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared )
|
|
||||||
|
|
Loading…
Reference in New Issue