diff --git a/extra/cuda/cuda.factor b/extra/cuda/cuda.factor index 1b144632fb..dd3f5b8f9e 100644 --- a/extra/cuda/cuda.factor +++ b/extra/cuda/cuda.factor @@ -7,7 +7,7 @@ destructors fry init io io.backend io.encodings.string io.encodings.utf8 kernel lexer locals macros math math.parser namespaces nested-comments opengl.gl.extensions parser prettyprint quotations sequences words cuda.libraries ; -QUALIFIED-WITH: alien.c-types a +QUALIFIED-WITH: alien.c-types c IN: cuda TUPLE: launcher @@ -41,11 +41,11 @@ dim-grid dim-block shared-size stream ; : c-type>cuda-setter ( c-type -- n cuda-type ) { - { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] } - { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] } - { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] } - { [ dup a:pointer? ] [ drop 4 [ cuda-int* ] ] } - { [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] } + { [ dup c:int = ] [ drop 4 [ cuda-int* ] ] } + { [ dup c:uint = ] [ drop 4 [ cuda-int* ] ] } + { [ dup c:float = ] [ drop 4 [ cuda-float* ] ] } + { [ dup c:pointer? ] [ drop 4 [ cuda-int* ] ] } + { [ dup c:void* = ] [ drop 4 [ cuda-int* ] ] } } cond ; _ with-cuda ] each ; inline : cuda-device-properties ( n -- properties ) - [ CUdevprop ] dip - [ cuDeviceGetProperties cuda-error ] 2keep drop - CUdevprop memory>struct ; + [ CUdevprop ] dip + [ cuDeviceGetProperties cuda-error ] 2keep drop ; : cuda-devices ( -- assoc ) enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ; @@ -68,3 +67,20 @@ IN: cuda.devices "CUDA Version: " write cuda-version number>string print nl #cuda-devices iota [ nl ] [ cuda-device. ] interleave ; +: up/i ( x y -- z ) + [ 1 - + ] keep /i ; inline + +:: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size + -- grid-size block-size per-block-shared ) + per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero + job-count min :> job-max-block-size + job-count job-max-block-size up/i :> grid-size + job-count grid-size up/i :> block-size + block-size per-job-shared * :> per-block-shared + + grid-size block-size per-block-shared ; inline + +: distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared ) + cuda-device get cuda-device-properties + [ sharedMemPerBlock>> ] [ maxThreadsDim>> ] bi + (distribute-jobs) ; inline diff --git a/extra/cuda/utils/utils.factor b/extra/cuda/utils/utils.factor index a85a0b35e9..269ebbe401 100644 --- a/extra/cuda/utils/utils.factor +++ b/extra/cuda/utils/utils.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien.c-types alien.data alien.strings arrays -assocs byte-arrays classes.struct combinators cuda.ffi io -io.backend io.encodings.utf8 kernel math.parser namespaces +assocs byte-arrays classes.struct combinators cuda.devices cuda.ffi +io io.backend io.encodings.utf8 kernel math.parser namespaces prettyprint sequences ; IN: cuda.utils @@ -92,5 +92,3 @@ ERROR: throw-cuda-error n ; : function-shared-size ( n -- ) [ cuda-function get ] dip cuFuncSetSharedSize cuda-error ; - -: distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared )