Split up cuda vocab some more, make CUDA-LIBRARY: work
							parent
							
								
									2517b2fc2b
								
							
						
					
					
						commit
						1624903ae1
					
				| 
						 | 
				
			
			@ -2,324 +2,42 @@
 | 
			
		|||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors alien alien.data alien.parser alien.strings
 | 
			
		||||
alien.syntax arrays assocs byte-arrays classes.struct
 | 
			
		||||
combinators continuations cuda.ffi destructors fry io
 | 
			
		||||
io.backend io.encodings.string io.encodings.utf8 kernel lexer
 | 
			
		||||
locals macros math math.parser namespaces nested-comments
 | 
			
		||||
opengl.gl.extensions parser prettyprint quotations sequences
 | 
			
		||||
words ;
 | 
			
		||||
combinators continuations cuda.ffi cuda.memory cuda.utils
 | 
			
		||||
destructors fry io io.backend io.encodings.string
 | 
			
		||||
io.encodings.utf8 kernel lexer locals macros math math.parser
 | 
			
		||||
namespaces nested-comments opengl.gl.extensions parser
 | 
			
		||||
prettyprint quotations sequences words ;
 | 
			
		||||
QUALIFIED-WITH: alien.c-types a
 | 
			
		||||
IN: cuda
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-device
 | 
			
		||||
SYMBOL: cuda-context
 | 
			
		||||
SYMBOL: cuda-module
 | 
			
		||||
SYMBOL: cuda-function
 | 
			
		||||
SYMBOL: cuda-launcher
 | 
			
		||||
SYMBOL: cuda-memory-hashtable
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-libraries
 | 
			
		||||
cuda-libraries [ H{ } clone ] initialize
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-functions
 | 
			
		||||
 | 
			
		||||
TUPLE: cuda-library name path ;
 | 
			
		||||
 | 
			
		||||
: <cuda-library> ( name path -- obj )
 | 
			
		||||
    \ cuda-library new
 | 
			
		||||
        swap >>path
 | 
			
		||||
        swap >>name ;
 | 
			
		||||
 | 
			
		||||
: add-cuda-library ( name path -- )
 | 
			
		||||
    normalize-path <cuda-library>
 | 
			
		||||
    dup name>> cuda-libraries get set-at ;
 | 
			
		||||
 | 
			
		||||
: cuda-library ( name -- cuda-library )
 | 
			
		||||
    cuda-libraries get at ;
 | 
			
		||||
 | 
			
		||||
ERROR: throw-cuda-error n ;
 | 
			
		||||
 | 
			
		||||
: cuda-error ( n -- )
 | 
			
		||||
    dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
 | 
			
		||||
 | 
			
		||||
: cuda-version ( -- n )
 | 
			
		||||
    a:int <c-object> [ cuDriverGetVersion cuda-error ] keep a:*int ;
 | 
			
		||||
 | 
			
		||||
: init-cuda ( -- )
 | 
			
		||||
    0 cuInit cuda-error ;
 | 
			
		||||
 | 
			
		||||
TUPLE: launcher
 | 
			
		||||
{ device integer initial: 0 }
 | 
			
		||||
{ device-flags initial: 0 }
 | 
			
		||||
path ;
 | 
			
		||||
{ device-flags initial: 0 } ;
 | 
			
		||||
 | 
			
		||||
TUPLE: function-launcher
 | 
			
		||||
dim-block
 | 
			
		||||
dim-grid
 | 
			
		||||
shared-size
 | 
			
		||||
stream ;
 | 
			
		||||
dim-block dim-grid shared-size stream ;
 | 
			
		||||
 | 
			
		||||
: with-cuda-context ( flags device quot -- )
 | 
			
		||||
    H{ } clone cuda-modules set-global
 | 
			
		||||
    H{ } clone cuda-functions set
 | 
			
		||||
    [
 | 
			
		||||
        [ CUcontext <c-object> ] 2dip
 | 
			
		||||
        [ cuCtxCreate cuda-error ] 3keep 2drop a:*void*
 | 
			
		||||
    ] dip 
 | 
			
		||||
    [ create-context ] dip 
 | 
			
		||||
    [ '[ _ @ ] ]
 | 
			
		||||
    [ drop '[ _ cuCtxDestroy cuda-error ] ] 2bi
 | 
			
		||||
    [ drop '[ _ destroy-context ] ] 2bi
 | 
			
		||||
    [ ] cleanup ; inline
 | 
			
		||||
 | 
			
		||||
: with-cuda-module ( path quot -- )
 | 
			
		||||
    [
 | 
			
		||||
        normalize-path
 | 
			
		||||
        [ CUmodule <c-object> ] dip
 | 
			
		||||
        [ cuModuleLoad cuda-error ] 2keep drop a:*void*
 | 
			
		||||
    ] dip
 | 
			
		||||
    [ '[ _ @ ] ]
 | 
			
		||||
    [ drop '[ _ cuModuleUnload cuda-error ] ] 2bi
 | 
			
		||||
    [ ] cleanup ; inline
 | 
			
		||||
 | 
			
		||||
: with-cuda-program ( flags device path quot -- )
 | 
			
		||||
: with-cuda-program ( flags device quot -- )
 | 
			
		||||
    [ dup cuda-device set ] 2dip
 | 
			
		||||
    '[
 | 
			
		||||
        cuda-context set
 | 
			
		||||
        _ [
 | 
			
		||||
            cuda-module set
 | 
			
		||||
            _ call
 | 
			
		||||
        ] with-cuda-module
 | 
			
		||||
    ] with-cuda-context ; inline
 | 
			
		||||
    '[ cuda-context set _ call ] with-cuda-context ; inline
 | 
			
		||||
 | 
			
		||||
: with-cuda ( launcher quot -- )
 | 
			
		||||
    [
 | 
			
		||||
        init-cuda
 | 
			
		||||
        H{ } clone cuda-memory-hashtable
 | 
			
		||||
    ] 2dip '[
 | 
			
		||||
    init-cuda
 | 
			
		||||
    [ H{ } clone cuda-memory-hashtable ] 2dip '[
 | 
			
		||||
        _ 
 | 
			
		||||
        [ cuda-launcher set ]
 | 
			
		||||
        [ [ device>> ] [ device-flags>> ] [ path>> ] tri ] bi
 | 
			
		||||
        [ [ device>> ] [ device-flags>> ] bi ] bi
 | 
			
		||||
        _ with-cuda-program
 | 
			
		||||
    ] with-variable ; inline
 | 
			
		||||
 | 
			
		||||
<PRIVATE
 | 
			
		||||
 | 
			
		||||
: #cuda-devices ( -- n )
 | 
			
		||||
    a:int <c-object> [ cuDeviceGetCount cuda-error ] keep a:*int ;
 | 
			
		||||
 | 
			
		||||
: n>cuda-device ( n -- device )
 | 
			
		||||
    [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop a:*int ;
 | 
			
		||||
 | 
			
		||||
: enumerate-cuda-devices ( -- devices )
 | 
			
		||||
    #cuda-devices iota [ n>cuda-device ] map ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-properties ( device -- properties )
 | 
			
		||||
    [ CUdevprop <c-object> ] dip
 | 
			
		||||
    [ cuDeviceGetProperties cuda-error ] 2keep drop
 | 
			
		||||
    CUdevprop memory>struct ;
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: cuda-devices ( -- assoc )
 | 
			
		||||
    enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-name ( n -- string )
 | 
			
		||||
    [ 256 [ <byte-array> ] keep ] dip
 | 
			
		||||
    [ cuDeviceGetName cuda-error ]
 | 
			
		||||
    [ 2drop utf8 alien>string ] 3bi ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-capability ( n -- pair )
 | 
			
		||||
    [ a:int <c-object> a:int <c-object> ] dip
 | 
			
		||||
    [ cuDeviceComputeCapability cuda-error ]
 | 
			
		||||
    [ drop [ a:*int ] bi@ ] 3bi 2array ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-memory ( n -- bytes )
 | 
			
		||||
    [ a:uint <c-object> ] dip
 | 
			
		||||
    [ cuDeviceTotalMem cuda-error ]
 | 
			
		||||
    [ drop a:*uint ] 2bi ;
 | 
			
		||||
 | 
			
		||||
: get-function-ptr* ( module string -- function )
 | 
			
		||||
    [ CUfunction <c-object> ] 2dip
 | 
			
		||||
    [ cuModuleGetFunction cuda-error ] 3keep 2drop a:*void* ;
 | 
			
		||||
 | 
			
		||||
: get-function-ptr ( string -- function )
 | 
			
		||||
    [ cuda-module get ] dip get-function-ptr* ;
 | 
			
		||||
 | 
			
		||||
: with-cuda-function ( string quot -- )
 | 
			
		||||
    [
 | 
			
		||||
        get-function-ptr* cuda-function set
 | 
			
		||||
    ] dip call ; inline
 | 
			
		||||
 | 
			
		||||
: cached-cuda-function ( string -- alien )
 | 
			
		||||
    cuda-functions get [ get-function-ptr ] cache ;
 | 
			
		||||
 | 
			
		||||
: launch-function* ( function -- ) cuLaunch cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch-function-grid* ( function width height -- )
 | 
			
		||||
    cuLaunchGrid cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch-function-grid ( width height -- )
 | 
			
		||||
    [ cuda-function get ] 2dip
 | 
			
		||||
    cuLaunchGrid cuda-error ;
 | 
			
		||||
 | 
			
		||||
TUPLE: cuda-memory < disposable ptr length ;
 | 
			
		||||
 | 
			
		||||
: <cuda-memory> ( ptr length -- obj )
 | 
			
		||||
    cuda-memory new-disposable
 | 
			
		||||
        swap >>length
 | 
			
		||||
        swap >>ptr ;
 | 
			
		||||
 | 
			
		||||
: add-cuda-memory ( obj -- obj )
 | 
			
		||||
    dup dup ptr>> cuda-memory-hashtable get set-at ;
 | 
			
		||||
 | 
			
		||||
: delete-cuda-memory ( obj -- )
 | 
			
		||||
    cuda-memory-hashtable delete-at ;
 | 
			
		||||
 | 
			
		||||
ERROR: invalid-cuda-memory ptr ;
 | 
			
		||||
 | 
			
		||||
: cuda-memory-length ( cuda-memory -- n )
 | 
			
		||||
    ptr>> cuda-memory-hashtable get ?at [
 | 
			
		||||
        length>>
 | 
			
		||||
    ] [
 | 
			
		||||
        invalid-cuda-memory
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
M: cuda-memory byte-length length>> ;
 | 
			
		||||
 | 
			
		||||
: cuda-malloc ( n -- ptr )
 | 
			
		||||
    [ CUdeviceptr <c-object> ] dip
 | 
			
		||||
    [ cuMemAlloc cuda-error ] 2keep
 | 
			
		||||
    [ a:*int ] dip <cuda-memory> add-cuda-memory ;
 | 
			
		||||
 | 
			
		||||
: cuda-free* ( ptr -- )
 | 
			
		||||
    cuMemFree cuda-error ;
 | 
			
		||||
 | 
			
		||||
M: cuda-memory dispose ( ptr -- )
 | 
			
		||||
    ptr>> cuda-free* ;
 | 
			
		||||
 | 
			
		||||
: host>device ( dest-ptr src-ptr -- )
 | 
			
		||||
    [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
:: device>host ( ptr -- seq )
 | 
			
		||||
    ptr byte-length <byte-array>
 | 
			
		||||
    [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
 | 
			
		||||
 | 
			
		||||
: memcpy-device>device ( dest-ptr src-ptr count -- )
 | 
			
		||||
    cuMemcpyDtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-device>array ( dest-array dest-index src-ptr count -- )
 | 
			
		||||
    cuMemcpyDtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>device ( dest-ptr src-array src-index count -- )
 | 
			
		||||
    cuMemcpyAtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>host ( dest-ptr src-array src-index count -- )
 | 
			
		||||
    cuMemcpyAtoH cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-host>array ( dest-array dest-index src-ptr count -- )
 | 
			
		||||
    cuMemcpyHtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
 | 
			
		||||
    cuMemcpyAtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-int* ( function offset value -- )
 | 
			
		||||
    cuParamSeti cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-int ( offset value -- )
 | 
			
		||||
    [ cuda-function get ] 2dip cuda-int* ;
 | 
			
		||||
 | 
			
		||||
: cuda-float* ( function offset value -- )
 | 
			
		||||
    cuParamSetf cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-float ( offset value -- )
 | 
			
		||||
    [ cuda-function get ] 2dip cuda-float* ;
 | 
			
		||||
 | 
			
		||||
: cuda-vector* ( function offset ptr n -- )
 | 
			
		||||
    cuParamSetv cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-vector ( offset ptr n -- )
 | 
			
		||||
    [ cuda-function get ] 3dip cuda-vector* ;
 | 
			
		||||
 | 
			
		||||
: param-size* ( function n -- )
 | 
			
		||||
    cuParamSetSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: param-size ( n -- )
 | 
			
		||||
    [ cuda-function get ] dip param-size* ;
 | 
			
		||||
 | 
			
		||||
: malloc-device-string ( string -- n )
 | 
			
		||||
    utf8 encode
 | 
			
		||||
    [ length cuda-malloc ] keep
 | 
			
		||||
    [ host>device ] [ drop ] 2bi ;
 | 
			
		||||
 | 
			
		||||
ERROR: bad-cuda-parameter parameter ;
 | 
			
		||||
 | 
			
		||||
:: set-parameters ( seq -- )
 | 
			
		||||
    cuda-function get :> function
 | 
			
		||||
    0 :> offset!
 | 
			
		||||
    seq [
 | 
			
		||||
        [ offset ] dip
 | 
			
		||||
        {
 | 
			
		||||
            { [ dup cuda-memory? ] [ ptr>> cuda-int ] }
 | 
			
		||||
            { [ dup float? ] [ cuda-float ] }
 | 
			
		||||
            { [ dup integer? ] [ cuda-int ] }
 | 
			
		||||
            [ bad-cuda-parameter ]
 | 
			
		||||
        } cond
 | 
			
		||||
        offset 4 + offset!
 | 
			
		||||
    ] each
 | 
			
		||||
    offset param-size ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-attribute ( attribute dev -- n )
 | 
			
		||||
    [ a:int <c-object> ] 2dip
 | 
			
		||||
    [ cuDeviceGetAttribute cuda-error ]
 | 
			
		||||
    [ 2drop a:*int ] 3bi ;
 | 
			
		||||
 | 
			
		||||
: function-block-shape* ( function x y z -- )
 | 
			
		||||
    cuFuncSetBlockShape cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-block-shape ( x y z -- )
 | 
			
		||||
    [ cuda-function get ] 3dip
 | 
			
		||||
    cuFuncSetBlockShape cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-shared-size* ( function n -- )
 | 
			
		||||
    cuFuncSetSharedSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-shared-size ( n -- )
 | 
			
		||||
    [ cuda-function get ] dip
 | 
			
		||||
    cuFuncSetSharedSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch ( -- )
 | 
			
		||||
    cuda-launcher get {
 | 
			
		||||
        [ block-shape>> first3 function-block-shape ]
 | 
			
		||||
        [ shared-size>> function-shared-size ]
 | 
			
		||||
        [
 | 
			
		||||
            grid>> [
 | 
			
		||||
                launch-function
 | 
			
		||||
            ] [
 | 
			
		||||
                first2 launch-function-grid
 | 
			
		||||
            ] if-empty
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: cuda-device. ( n -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ "Device: " write number>string print ]
 | 
			
		||||
        [ "Name: " write cuda-device-name print ]
 | 
			
		||||
        [ "Memory: " write cuda-device-memory number>string print ]
 | 
			
		||||
        [
 | 
			
		||||
            "Capability: " write
 | 
			
		||||
            cuda-device-capability [ number>string ] map " " join print
 | 
			
		||||
        ]
 | 
			
		||||
        [ "Properties: " write cuda-device-properties . ]
 | 
			
		||||
        [
 | 
			
		||||
            "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
 | 
			
		||||
            CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
 | 
			
		||||
            cuda-device-attribute number>string print
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: cuda. ( -- )
 | 
			
		||||
    "CUDA Version: " write cuda-version number>string print nl
 | 
			
		||||
    #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
 | 
			
		||||
 | 
			
		||||
: c-type>cuda-setter ( c-type -- n cuda-type )
 | 
			
		||||
    {
 | 
			
		||||
        { [ dup a:int = ] [ drop 4 [ cuda-int* ] ] }
 | 
			
		||||
| 
						 | 
				
			
			@ -353,13 +71,13 @@ MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) )
 | 
			
		|||
    swap '[ _ param-size* ] suffix
 | 
			
		||||
    '[ _ cleave ] ;
 | 
			
		||||
 | 
			
		||||
: define-cuda-word ( word string arguments -- )
 | 
			
		||||
: define-cuda-word ( word module-name function-name arguments -- )
 | 
			
		||||
    [
 | 
			
		||||
        '[
 | 
			
		||||
            _ get-function-ptr
 | 
			
		||||
            _ _ cached-function
 | 
			
		||||
            [ nip _ cuda-arguments ]
 | 
			
		||||
            [ run-function-launcher ] 2bi
 | 
			
		||||
        ]
 | 
			
		||||
    ]
 | 
			
		||||
    [ nip \ function-launcher suffix a:void function-effect ]
 | 
			
		||||
    2bi define-declared ;
 | 
			
		||||
    [ 2nip \ function-launcher suffix a:void function-effect ]
 | 
			
		||||
    3bi define-declared ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
! Copyright (C) 2010 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: alien.c-types alien.strings cuda cuda.syntax destructors
 | 
			
		||||
io.encodings.utf8 kernel locals math prettyprint sequences ;
 | 
			
		||||
USING: alien.c-types alien.strings cuda cuda.memory cuda.syntax
 | 
			
		||||
destructors io io.encodings.utf8 kernel locals math sequences ;
 | 
			
		||||
IN: cuda.demos.hello-world
 | 
			
		||||
 | 
			
		||||
CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx
 | 
			
		||||
| 
						 | 
				
			
			@ -9,22 +9,13 @@ CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx
 | 
			
		|||
CUDA-FUNCTION: helloWorld ( char* string-ptr ) ;
 | 
			
		||||
 | 
			
		||||
:: cuda-hello-world ( -- )
 | 
			
		||||
    T{ launcher
 | 
			
		||||
        { device 0 }
 | 
			
		||||
        { path "vocab:cuda/demos/hello-world/hello.ptx" }
 | 
			
		||||
    } [
 | 
			
		||||
        "Hello World!" [ - ] map-index malloc-device-string &dispose dup :> str
 | 
			
		||||
    T{ launcher { device 0 } } [
 | 
			
		||||
        "Hello World!" [ - ] map-index malloc-device-string
 | 
			
		||||
        &dispose dup :> str
 | 
			
		||||
 | 
			
		||||
        T{ function-launcher
 | 
			
		||||
            { dim-block { 6 1 1 } }
 | 
			
		||||
            { dim-grid { 2 1 } }
 | 
			
		||||
            { shared-size 0 }
 | 
			
		||||
        }
 | 
			
		||||
        helloWorld
 | 
			
		||||
        { 6 1 1 } { 2 1 } 1 3<<< helloWorld
 | 
			
		||||
 | 
			
		||||
        ! <<< { 6 1 1 } { 2 1 } 1 >>> helloWorld
 | 
			
		||||
 | 
			
		||||
        str device>host utf8 alien>string .
 | 
			
		||||
        str device>host utf8 alien>string print
 | 
			
		||||
    ] with-cuda ;
 | 
			
		||||
 | 
			
		||||
MAIN: cuda-hello-world
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,14 +8,9 @@ CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx
 | 
			
		|||
CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ;
 | 
			
		||||
 | 
			
		||||
:: cuda-prefix-sum ( -- )
 | 
			
		||||
    T{ launcher
 | 
			
		||||
        { device 0 }
 | 
			
		||||
        { path "vocab:cuda/demos/prefix-sum/prefix-sum.ptx" }
 | 
			
		||||
    } [
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
    T{ launcher { device 0 } }
 | 
			
		||||
    [
 | 
			
		||||
        ! { 1 1 1 } { 2 1 } 0 3<<< prefix_sum_block
 | 
			
		||||
 | 
			
		||||
    ] with-cuda ;
 | 
			
		||||
 | 
			
		||||
MAIN: cuda-prefix-sum
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Doug Coleman
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,74 @@
 | 
			
		|||
! Copyright (C) 2010 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors alien alien.data assocs byte-arrays cuda.ffi
 | 
			
		||||
cuda.utils destructors io.encodings.string io.encodings.utf8
 | 
			
		||||
kernel locals namespaces sequences ;
 | 
			
		||||
QUALIFIED-WITH: alien.c-types a
 | 
			
		||||
IN: cuda.memory
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-memory-hashtable
 | 
			
		||||
 | 
			
		||||
TUPLE: cuda-memory < disposable ptr length ;
 | 
			
		||||
 | 
			
		||||
: <cuda-memory> ( ptr length -- obj )
 | 
			
		||||
    cuda-memory new-disposable
 | 
			
		||||
        swap >>length
 | 
			
		||||
        swap >>ptr ;
 | 
			
		||||
 | 
			
		||||
: add-cuda-memory ( obj -- obj )
 | 
			
		||||
    dup dup ptr>> cuda-memory-hashtable get set-at ;
 | 
			
		||||
 | 
			
		||||
: delete-cuda-memory ( obj -- )
 | 
			
		||||
    cuda-memory-hashtable delete-at ;
 | 
			
		||||
 | 
			
		||||
ERROR: invalid-cuda-memory ptr ;
 | 
			
		||||
 | 
			
		||||
: cuda-memory-length ( cuda-memory -- n )
 | 
			
		||||
    ptr>> cuda-memory-hashtable get ?at [
 | 
			
		||||
        length>>
 | 
			
		||||
    ] [
 | 
			
		||||
        invalid-cuda-memory
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
M: cuda-memory byte-length length>> ;
 | 
			
		||||
 | 
			
		||||
: cuda-malloc ( n -- ptr )
 | 
			
		||||
    [ CUdeviceptr <c-object> ] dip
 | 
			
		||||
    [ cuMemAlloc cuda-error ] 2keep
 | 
			
		||||
    [ a:*int ] dip <cuda-memory> add-cuda-memory ;
 | 
			
		||||
 | 
			
		||||
: cuda-free* ( ptr -- )
 | 
			
		||||
    cuMemFree cuda-error ;
 | 
			
		||||
 | 
			
		||||
M: cuda-memory dispose ( ptr -- )
 | 
			
		||||
    ptr>> cuda-free* ;
 | 
			
		||||
 | 
			
		||||
: memcpy-device>device ( dest-ptr src-ptr count -- )
 | 
			
		||||
    cuMemcpyDtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-device>array ( dest-array dest-index src-ptr count -- )
 | 
			
		||||
    cuMemcpyDtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>device ( dest-ptr src-array src-index count -- )
 | 
			
		||||
    cuMemcpyAtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>host ( dest-ptr src-array src-index count -- )
 | 
			
		||||
    cuMemcpyAtoH cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-host>array ( dest-array dest-index src-ptr count -- )
 | 
			
		||||
    cuMemcpyHtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: memcpy-array>array ( dest-array dest-index src-array src-ptr count -- )
 | 
			
		||||
    cuMemcpyAtoA cuda-error ;
 | 
			
		||||
 | 
			
		||||
: host>device ( dest-ptr src-ptr -- )
 | 
			
		||||
    [ ptr>> ] dip dup length cuMemcpyHtoD cuda-error ;
 | 
			
		||||
 | 
			
		||||
:: device>host ( ptr -- seq )
 | 
			
		||||
    ptr byte-length <byte-array>
 | 
			
		||||
    [ ptr [ ptr>> ] [ byte-length ] bi cuMemcpyDtoH cuda-error ] keep ;
 | 
			
		||||
 | 
			
		||||
: malloc-device-string ( string -- n )
 | 
			
		||||
    utf8 encode
 | 
			
		||||
    [ length cuda-malloc ] keep
 | 
			
		||||
    [ host>device ] [ drop ] 2bi ;
 | 
			
		||||
| 
						 | 
				
			
			@ -1,12 +1,17 @@
 | 
			
		|||
! Copyright (C) 2010 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: alien.parser cuda kernel lexer parser ;
 | 
			
		||||
USING: alien.parser cuda cuda.utils io.backend kernel lexer
 | 
			
		||||
namespaces parser ;
 | 
			
		||||
IN: cuda.syntax
 | 
			
		||||
 | 
			
		||||
SYNTAX: CUDA-LIBRARY: scan scan add-cuda-library ;
 | 
			
		||||
SYNTAX: CUDA-LIBRARY:
 | 
			
		||||
    scan scan normalize-path
 | 
			
		||||
    [ add-cuda-library ]
 | 
			
		||||
    [ drop current-cuda-library set-global ] 2bi ;
 | 
			
		||||
 | 
			
		||||
SYNTAX: CUDA-FUNCTION:
 | 
			
		||||
    scan [ create-in ] [ ] bi ";" scan-c-args drop define-cuda-word ;
 | 
			
		||||
    scan [ create-in current-cuda-library get ] [ ] bi
 | 
			
		||||
    ";" scan-c-args drop define-cuda-word ;
 | 
			
		||||
 | 
			
		||||
: 3<<< ( dim-block dim-grid shared-size -- function-launcher )
 | 
			
		||||
    f function-launcher boa ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
Doug Coleman
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,204 @@
 | 
			
		|||
! Copyright (C) 2010 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors alien.data alien.strings arrays assocs
 | 
			
		||||
byte-arrays classes.struct combinators cuda.ffi io io.backend
 | 
			
		||||
io.encodings.utf8 kernel math.parser namespaces prettyprint
 | 
			
		||||
sequences ;
 | 
			
		||||
QUALIFIED-WITH: alien.c-types a
 | 
			
		||||
IN: cuda.utils
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-device
 | 
			
		||||
SYMBOL: cuda-context
 | 
			
		||||
SYMBOL: cuda-module
 | 
			
		||||
SYMBOL: cuda-function
 | 
			
		||||
SYMBOL: cuda-launcher
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-modules
 | 
			
		||||
SYMBOL: cuda-functions
 | 
			
		||||
 | 
			
		||||
ERROR: throw-cuda-error n ;
 | 
			
		||||
 | 
			
		||||
: cuda-error ( n -- )
 | 
			
		||||
    dup CUDA_SUCCESS = [ drop ] [ throw-cuda-error ] if ;
 | 
			
		||||
 | 
			
		||||
: init-cuda ( -- )
 | 
			
		||||
    0 cuInit cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-version ( -- n )
 | 
			
		||||
    a:int <c-object> [ cuDriverGetVersion cuda-error ] keep a:*int ;
 | 
			
		||||
 | 
			
		||||
: #cuda-devices ( -- n )
 | 
			
		||||
    a:int <c-object> [ cuDeviceGetCount cuda-error ] keep a:*int ;
 | 
			
		||||
 | 
			
		||||
: n>cuda-device ( n -- device )
 | 
			
		||||
    [ CUdevice <c-object> ] dip [ cuDeviceGet cuda-error ] 2keep drop a:*int ;
 | 
			
		||||
 | 
			
		||||
: enumerate-cuda-devices ( -- devices )
 | 
			
		||||
    #cuda-devices iota [ n>cuda-device ] map ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-properties ( device -- properties )
 | 
			
		||||
    [ CUdevprop <c-object> ] dip
 | 
			
		||||
    [ cuDeviceGetProperties cuda-error ] 2keep drop
 | 
			
		||||
    CUdevprop memory>struct ;
 | 
			
		||||
 | 
			
		||||
: cuda-devices ( -- assoc )
 | 
			
		||||
    enumerate-cuda-devices [ dup cuda-device-properties ] { } map>assoc ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-name ( n -- string )
 | 
			
		||||
    [ 256 [ <byte-array> ] keep ] dip
 | 
			
		||||
    [ cuDeviceGetName cuda-error ]
 | 
			
		||||
    [ 2drop utf8 alien>string ] 3bi ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-capability ( n -- pair )
 | 
			
		||||
    [ a:int <c-object> a:int <c-object> ] dip
 | 
			
		||||
    [ cuDeviceComputeCapability cuda-error ]
 | 
			
		||||
    [ drop [ a:*int ] bi@ ] 3bi 2array ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-memory ( n -- bytes )
 | 
			
		||||
    [ a:uint <c-object> ] dip
 | 
			
		||||
    [ cuDeviceTotalMem cuda-error ]
 | 
			
		||||
    [ drop a:*uint ] 2bi ;
 | 
			
		||||
 | 
			
		||||
: get-function-ptr* ( module string -- function )
 | 
			
		||||
    [ CUfunction <c-object> ] 2dip
 | 
			
		||||
    [ cuModuleGetFunction cuda-error ] 3keep 2drop a:*void* ;
 | 
			
		||||
 | 
			
		||||
: get-function-ptr ( string -- function )
 | 
			
		||||
    [ cuda-module get ] dip get-function-ptr* ;
 | 
			
		||||
 | 
			
		||||
: with-cuda-function ( string quot -- )
 | 
			
		||||
    [
 | 
			
		||||
        get-function-ptr* cuda-function set
 | 
			
		||||
    ] dip call ; inline
 | 
			
		||||
 | 
			
		||||
: create-context ( flags device -- context )
 | 
			
		||||
    [ CUcontext <c-object> ] 2dip
 | 
			
		||||
    [ cuCtxCreate cuda-error ] 3keep 2drop a:*void* ;
 | 
			
		||||
 | 
			
		||||
: destroy-context ( context -- ) cuCtxDestroy cuda-error ;
 | 
			
		||||
 | 
			
		||||
SYMBOL: cuda-libraries
 | 
			
		||||
cuda-libraries [ H{ } clone ] initialize
 | 
			
		||||
 | 
			
		||||
SYMBOL: current-cuda-library
 | 
			
		||||
 | 
			
		||||
TUPLE: cuda-library name path handle ;
 | 
			
		||||
 | 
			
		||||
: <cuda-library> ( name path -- obj )
 | 
			
		||||
    \ cuda-library new
 | 
			
		||||
        swap >>path
 | 
			
		||||
        swap >>name ;
 | 
			
		||||
 | 
			
		||||
: add-cuda-library ( name path -- )
 | 
			
		||||
    normalize-path <cuda-library>
 | 
			
		||||
    dup name>> cuda-libraries get-global set-at ;
 | 
			
		||||
 | 
			
		||||
: ?delete-at ( key assoc -- old/key ? )
 | 
			
		||||
    2dup delete-at* [ 2nip t ] [ 2drop f ] if ; inline
 | 
			
		||||
 | 
			
		||||
ERROR: no-cuda-library name ;
 | 
			
		||||
 | 
			
		||||
: load-module ( path -- module )
 | 
			
		||||
    [ CUmodule <c-object> ] dip
 | 
			
		||||
    [ cuModuleLoad cuda-error ] 2keep drop a:*void* ;
 | 
			
		||||
 | 
			
		||||
: unload-module ( module -- )
 | 
			
		||||
    cuModuleUnload cuda-error ;
 | 
			
		||||
 | 
			
		||||
: load-cuda-library ( library -- handle )
 | 
			
		||||
    path>> load-module ;
 | 
			
		||||
 | 
			
		||||
: lookup-cuda-library ( name -- cuda-library )
 | 
			
		||||
    cuda-libraries get ?at [ no-cuda-library ] unless ;
 | 
			
		||||
 | 
			
		||||
: remove-cuda-library ( name -- library )
 | 
			
		||||
    cuda-libraries get ?delete-at [ no-cuda-library ] unless ;
 | 
			
		||||
 | 
			
		||||
: unload-cuda-library ( name -- )
 | 
			
		||||
    remove-cuda-library handle>> unload-module ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
: cached-module ( module-name -- alien )
 | 
			
		||||
    lookup-cuda-library
 | 
			
		||||
    cuda-modules get-global [ load-cuda-library ] cache ;
 | 
			
		||||
 | 
			
		||||
: cached-function ( module-name function-name -- alien )
 | 
			
		||||
    [ cached-module ] dip
 | 
			
		||||
    2array cuda-functions get [ first2 get-function-ptr* ] cache ;
 | 
			
		||||
 | 
			
		||||
: launch-function* ( function -- ) cuLaunch cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch-function ( -- ) cuda-function get cuLaunch cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-int* ( function offset value -- )
 | 
			
		||||
    cuParamSeti cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-int ( offset value -- )
 | 
			
		||||
    [ cuda-function get ] 2dip cuda-int* ;
 | 
			
		||||
 | 
			
		||||
: cuda-float* ( function offset value -- )
 | 
			
		||||
    cuParamSetf cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-float ( offset value -- )
 | 
			
		||||
    [ cuda-function get ] 2dip cuda-float* ;
 | 
			
		||||
 | 
			
		||||
: cuda-vector* ( function offset ptr n -- )
 | 
			
		||||
    cuParamSetv cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-vector ( offset ptr n -- )
 | 
			
		||||
    [ cuda-function get ] 3dip cuda-vector* ;
 | 
			
		||||
 | 
			
		||||
: param-size* ( function n -- )
 | 
			
		||||
    cuParamSetSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: param-size ( n -- )
 | 
			
		||||
    [ cuda-function get ] dip param-size* ;
 | 
			
		||||
 | 
			
		||||
: launch-function-grid* ( function width height -- )
 | 
			
		||||
    cuLaunchGrid cuda-error ;
 | 
			
		||||
 | 
			
		||||
: launch-function-grid ( width height -- )
 | 
			
		||||
    [ cuda-function get ] 2dip
 | 
			
		||||
    cuLaunchGrid cuda-error ;
 | 
			
		||||
 | 
			
		||||
ERROR: bad-cuda-parameter parameter ;
 | 
			
		||||
 | 
			
		||||
: cuda-device-attribute ( attribute dev -- n )
 | 
			
		||||
    [ a:int <c-object> ] 2dip
 | 
			
		||||
    [ cuDeviceGetAttribute cuda-error ]
 | 
			
		||||
    [ 2drop a:*int ] 3bi ;
 | 
			
		||||
 | 
			
		||||
: function-block-shape* ( function x y z -- )
 | 
			
		||||
    cuFuncSetBlockShape cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-block-shape ( x y z -- )
 | 
			
		||||
    [ cuda-function get ] 3dip
 | 
			
		||||
    cuFuncSetBlockShape cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-shared-size* ( function n -- )
 | 
			
		||||
    cuFuncSetSharedSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: function-shared-size ( n -- )
 | 
			
		||||
    [ cuda-function get ] dip
 | 
			
		||||
    cuFuncSetSharedSize cuda-error ;
 | 
			
		||||
 | 
			
		||||
: cuda-device. ( n -- )
 | 
			
		||||
    {
 | 
			
		||||
        [ "Device: " write number>string print ]
 | 
			
		||||
        [ "Name: " write cuda-device-name print ]
 | 
			
		||||
        [ "Memory: " write cuda-device-memory number>string print ]
 | 
			
		||||
        [
 | 
			
		||||
            "Capability: " write
 | 
			
		||||
            cuda-device-capability [ number>string ] map " " join print
 | 
			
		||||
        ]
 | 
			
		||||
        [ "Properties: " write cuda-device-properties . ]
 | 
			
		||||
        [
 | 
			
		||||
            "CU_DEVICE_ATTRIBUTE_GPU_OVERLAP: " write
 | 
			
		||||
            CU_DEVICE_ATTRIBUTE_GPU_OVERLAP swap
 | 
			
		||||
            cuda-device-attribute number>string print
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: cuda. ( -- )
 | 
			
		||||
    "CUDA Version: " write cuda-version number>string print nl
 | 
			
		||||
    #cuda-devices iota [ nl ] [ cuda-device. ] interleave ;
 | 
			
		||||
		Loading…
	
		Reference in New Issue