cuda.libraries: make grid tuple read-only, grid-dim/block-dim foldable, and CUDA-FUNCTION: words inline so that kernel call sites can optimize

db4
Joe Groff 2010-05-20 18:10:17 -07:00
parent 2ad382865e
commit ff748b5c9f
1 changed files with 25 additions and 8 deletions

View File

@ -38,7 +38,10 @@ SYMBOL: current-cuda-library
cuFuncSetSharedSize cuda-error ; inline
TUPLE: grid
dim-grid dim-block shared-size stream ;
{ dim-grid read-only }
{ dim-block read-only }
{ shared-size read-only initial: 0 }
{ stream read-only } ;
: <grid> ( dim-grid dim-block -- grid )
0 f grid boa ; inline
@ -50,10 +53,24 @@ dim-grid dim-block shared-size stream ;
grid boa ; inline
<PRIVATE
: block-dim ( block -- x y z )
dup sequence? [ 3 1 pad-tail first3 ] [ 1 1 ] if ; inline
: grid-dim ( block -- x y )
dup sequence? [ 2 1 pad-tail first2 ] [ 1 ] if ; inline
GENERIC: block-dim ( block-size -- x y z ) foldable
M: integer block-dim 1 1 ; inline
M: sequence block-dim
dup length {
{ 0 [ drop 1 1 1 ] }
{ 1 [ first 1 1 ] }
{ 2 [ first2 1 ] }
[ drop first3 ]
} case ; inline
GENERIC: grid-dim ( grid-size -- x y ) foldable
M: integer grid-dim 1 ; inline
M: sequence grid-dim
dup length {
{ 0 [ drop 1 1 ] }
{ 1 [ first 1 ] }
[ drop first2 ]
} case ; inline
PRIVATE>
: load-module ( path -- module )
@ -89,7 +106,7 @@ ERROR: no-cuda-library name ;
[ grid-dim launch-function-grid ]
[ launch-function ] if*
]
} 2cleave ;
} 2cleave ; inline
<PRIVATE
: make-param-buffer ( function size -- buffer size )
@ -163,10 +180,10 @@ MACRO: cuda-invoke ( module-name function-name arguments -- )
: define-cuda-function ( word module-name function-name arguments -- )
[ '[ _ _ _ cuda-invoke ] ]
[ 2nip \ grid suffix c:void function-effect ]
3bi define-declared ;
3bi define-inline ;
: define-cuda-global ( word module-name symbol-name -- )
'[ _ _ cuda-global ] (( -- device-ptr )) define-declared ;
'[ _ _ cuda-global ] (( -- device-ptr )) define-inline ;
TUPLE: cuda-library name abi path handle ;
ERROR: bad-cuda-abi abi ;