cuda.devices: "distribute-jobs" utility word that determines grid and block size for a job based on per-thread shared memory requirements and max block size
							parent
							
								
									097100cb12
								
							
						
					
					
						commit
						f26a5836e6
					
				|  | @ -7,7 +7,7 @@ destructors fry init io io.backend io.encodings.string | ||||||
| io.encodings.utf8 kernel lexer locals macros math math.parser | io.encodings.utf8 kernel lexer locals macros math math.parser | ||||||
| namespaces nested-comments opengl.gl.extensions parser | namespaces nested-comments opengl.gl.extensions parser | ||||||
| prettyprint quotations sequences words cuda.libraries ; | prettyprint quotations sequences words cuda.libraries ; | ||||||
| QUALIFIED-WITH: alien.c-types a | QUALIFIED-WITH: alien.c-types c | ||||||
| IN: cuda | IN: cuda | ||||||
| 
 | 
 | ||||||
| TUPLE: launcher | TUPLE: launcher | ||||||
|  | @ -41,11 +41,11 @@ dim-grid dim-block shared-size stream ; | ||||||
| 
 | 
 | ||||||
| : c-type>cuda-setter ( c-type -- n cuda-type ) | : c-type>cuda-setter ( c-type -- n cuda-type ) | ||||||
|     { |     { | ||||||
|         { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] } |         { [ dup c:int = ] [ drop 4 [ cuda-int* ] ] } | ||||||
|         { [ dup a:uint = ] [ drop 4 [ cuda-int* ] ] } |         { [ dup c:uint = ] [ drop 4 [ cuda-int* ] ] } | ||||||
|         { [ dup a:float = ] [ drop 4 [ cuda-float* ] ] } |         { [ dup c:float = ] [ drop 4 [ cuda-float* ] ] } | ||||||
|         { [ dup a:pointer? ] [ drop 4 [ cuda-int* ] ] } |         { [ dup c:pointer? ] [ drop 4 [ cuda-int* ] ] } | ||||||
|         { [ dup a:void* = ] [ drop 4 [ cuda-int* ] ] } |         { [ dup c:void* = ] [ drop 4 [ cuda-int* ] ] } | ||||||
|     } cond ; |     } cond ; | ||||||
| 
 | 
 | ||||||
| <PRIVATE | <PRIVATE | ||||||
|  | @ -85,5 +85,5 @@ MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) ) | ||||||
|             [ run-function-launcher ] 2bi |             [ run-function-launcher ] 2bi | ||||||
|         ] |         ] | ||||||
|     ] |     ] | ||||||
|     [ 2nip \ function-launcher suffix a:void function-effect ] |     [ 2nip \ function-launcher suffix c:void function-effect ] | ||||||
|     3bi define-declared ; |     3bi define-declared ; | ||||||
|  |  | ||||||
|  | @ -0,0 +1,13 @@ | ||||||
|  | ! (c)2010 Joe Groff bsd license | ||||||
|  | USING: cuda.devices tools.test ; | ||||||
|  | IN: cuda.devices.tests | ||||||
|  | 
 | ||||||
|  | [ 1  5 100 ] [  5 20 100 10 (distribute-jobs) ] unit-test  | ||||||
|  | [ 2  5 100 ] [ 10 20 100 10 (distribute-jobs) ] unit-test  | ||||||
|  | [ 2  5 100 ] [ 10 20 200  5 (distribute-jobs) ] unit-test  | ||||||
|  | [ 2  5 100 ] [ 10 20 300  6 (distribute-jobs) ] unit-test  | ||||||
|  | [ 2  6 120 ] [ 11 20 300  6 (distribute-jobs) ] unit-test  | ||||||
|  | [ 1 10 200 ] [ 10 20 200 10 (distribute-jobs) ] unit-test  | ||||||
|  | [ 1 10   0 ] [ 10  0 200 10 (distribute-jobs) ] unit-test  | ||||||
|  | [ 2  5   0 ] [ 10  0 200  9 (distribute-jobs) ] unit-test  | ||||||
|  | 
 | ||||||
|  | @ -1,9 +1,9 @@ | ||||||
| ! Copyright (C) 2010 Doug Coleman. | ! Copyright (C) 2010 Doug Coleman. | ||||||
| ! See http://factorcode.org/license.txt for BSD license. | ! See http://factorcode.org/license.txt for BSD license. | ||||||
| USING: alien.c-types alien.data alien.strings arrays assocs | USING: accessors alien.c-types alien.data alien.strings arrays | ||||||
| byte-arrays classes.struct combinators cuda cuda.ffi cuda.utils | assocs byte-arrays classes.struct combinators cuda cuda.ffi | ||||||
| fry io io.encodings.utf8 kernel math.parser prettyprint | cuda.utils fry io io.encodings.utf8 kernel locals math | ||||||
| sequences ; | math.order math.parser namespaces prettyprint sequences ; | ||||||
| IN: cuda.devices | IN: cuda.devices | ||||||
| 
 | 
 | ||||||
| : #cuda-devices ( -- n ) | : #cuda-devices ( -- n ) | ||||||
|  | @ -19,9 +19,8 @@ IN: cuda.devices | ||||||
|     [ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline |     [ enumerate-cuda-devices ] dip '[ <launcher> _ with-cuda ] each ; inline | ||||||
| 
 | 
 | ||||||
| : cuda-device-properties ( n -- properties ) | : cuda-device-properties ( n -- properties ) | ||||||
|     [ CUdevprop <c-object> ] dip |     [ CUdevprop <struct> ] dip | ||||||
|     [ cuDeviceGetProperties cuda-error ] 2keep drop |     [ cuDeviceGetProperties cuda-error ] 2keep drop ; | ||||||
|     CUdevprop memory>struct ; |  | ||||||
| 
 | 
 | ||||||
| : cuda-devices ( -- assoc ) | : cuda-devices ( -- assoc ) | ||||||
|     enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ; |     enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ; | ||||||
|  | @ -68,3 +67,20 @@ IN: cuda.devices | ||||||
|     "CUDA Version: " write cuda-version number>string print nl |     "CUDA Version: " write cuda-version number>string print nl | ||||||
|     #cuda-devices iota [ nl ] [ cuda-device. ] interleave ; |     #cuda-devices iota [ nl ] [ cuda-device. ] interleave ; | ||||||
| 
 | 
 | ||||||
|  | : up/i ( x y -- z ) | ||||||
|  |     [ 1 - + ] keep /i ; inline | ||||||
|  | 
 | ||||||
|  | :: (distribute-jobs) ( job-count per-job-shared max-shared-size max-block-size | ||||||
|  |                        -- grid-size block-size per-block-shared ) | ||||||
|  |     per-job-shared [ max-block-size ] [ max-shared-size swap /i max-block-size min ] if-zero | ||||||
|  |         job-count min :> job-max-block-size | ||||||
|  |     job-count job-max-block-size up/i :> grid-size | ||||||
|  |     job-count grid-size up/i          :> block-size | ||||||
|  |     block-size per-job-shared *       :> per-block-shared | ||||||
|  | 
 | ||||||
|  |     grid-size block-size per-block-shared ; inline | ||||||
|  | 
 | ||||||
|  | : distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared ) | ||||||
|  |     cuda-device get cuda-device-properties  | ||||||
|  |     [ sharedMemPerBlock>> ] [ maxThreadsDim>> ] bi | ||||||
|  |     (distribute-jobs) ; inline | ||||||
|  |  | ||||||
|  | @ -1,8 +1,8 @@ | ||||||
| ! Copyright (C) 2010 Doug Coleman. | ! Copyright (C) 2010 Doug Coleman. | ||||||
| ! See http://factorcode.org/license.txt for BSD license. | ! See http://factorcode.org/license.txt for BSD license. | ||||||
| USING: accessors alien.c-types alien.data alien.strings arrays | USING: accessors alien.c-types alien.data alien.strings arrays | ||||||
| assocs byte-arrays classes.struct combinators cuda.ffi io | assocs byte-arrays classes.struct combinators cuda.devices cuda.ffi | ||||||
| io.backend io.encodings.utf8 kernel math.parser namespaces | io io.backend io.encodings.utf8 kernel math.parser namespaces | ||||||
| prettyprint sequences ; | prettyprint sequences ; | ||||||
| IN: cuda.utils | IN: cuda.utils | ||||||
| 
 | 
 | ||||||
|  | @ -92,5 +92,3 @@ ERROR: throw-cuda-error n ; | ||||||
| : function-shared-size ( n -- ) | : function-shared-size ( n -- ) | ||||||
|     [ cuda-function get ] dip |     [ cuda-function get ] dip | ||||||
|     cuFuncSetSharedSize cuda-error ; |     cuFuncSetSharedSize cuda-error ; | ||||||
| 
 |  | ||||||
| : distribute-jobs ( job-count per-job-shared -- grid-size block-size per-block-shared ) |  | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue