From 2ad382865e6ef3717e0ae7648c18d6ef8fbdaa23 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 20 May 2010 17:56:20 -0700 Subject: [PATCH] cuda.libraries, cuda.syntax: support for both 32- and 64-bit CUDA pointer abis. make CUDA-LIBRARY: read the abi as part of the library definition --- .../cuda/demos/hello-world/hello-world.factor | 2 +- extra/cuda/demos/prefix-sum/prefix-sum.factor | 4 +- extra/cuda/ffi/ffi.factor | 9 ---- extra/cuda/libraries/libraries.factor | 53 +++++++++++++------ extra/cuda/syntax/syntax.factor | 8 +-- 5 files changed, 45 insertions(+), 31 deletions(-) diff --git a/extra/cuda/demos/hello-world/hello-world.factor b/extra/cuda/demos/hello-world/hello-world.factor index dae6ce83d6..8a7adb7b4d 100644 --- a/extra/cuda/demos/hello-world/hello-world.factor +++ b/extra/cuda/demos/hello-world/hello-world.factor @@ -6,7 +6,7 @@ destructors io io.encodings.string io.encodings.utf8 kernel locals math math.parser namespaces sequences strings ; IN: cuda.demos.hello-world -CUDA-LIBRARY: hello vocab:cuda/demos/hello-world/hello.ptx +CUDA-LIBRARY: hello cuda32 vocab:cuda/demos/hello-world/hello.ptx CUDA-FUNCTION: helloWorld ( char* string-ptr ) ; diff --git a/extra/cuda/demos/prefix-sum/prefix-sum.factor b/extra/cuda/demos/prefix-sum/prefix-sum.factor index aedc9aa095..d217f61c60 100644 --- a/extra/cuda/demos/prefix-sum/prefix-sum.factor +++ b/extra/cuda/demos/prefix-sum/prefix-sum.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types cuda cuda.contexts cuda.syntax locals ; +USING: alien.c-types cuda cuda.contexts cuda.libraries cuda.syntax locals ; IN: cuda.demos.prefix-sum -CUDA-LIBRARY: prefix-sum vocab:cuda/demos/prefix-sum/prefix-sum.ptx +CUDA-LIBRARY: prefix-sum cuda32 vocab:cuda/demos/prefix-sum/prefix-sum.ptx CUDA-FUNCTION: prefix_sum_block ( uint* in, uint* out, uint n ) ; diff --git a/extra/cuda/ffi/ffi.factor b/extra/cuda/ffi/ffi.factor index bcbb1ff60a..c0537bea8d 100644 --- a/extra/cuda/ffi/ffi.factor +++ b/extra/cuda/ffi/ffi.factor @@ -28,15 +28,6 @@ TYPEDEF: void* CUgraphicsResource SYMBOLS: CUdouble CUlonglong CUulonglong ; -: >cuda-param-type ( c-type -- c-type' ) - { - { CUdeviceptr [ void* ] } - { double [ CUdouble ] } - { longlong [ CUlonglong ] } - { ulonglong [ CUulonglong ] } - [ ] - } case ; - << : always-8-byte-align ( c-type -- c-type ) 8 >>align 8 >>align-first ; diff --git a/extra/cuda/libraries/libraries.factor b/extra/cuda/libraries/libraries.factor index b4a3e35e9f..157a4e2fca 100644 --- a/extra/cuda/libraries/libraries.factor +++ b/extra/cuda/libraries/libraries.factor @@ -3,11 +3,14 @@ USING: accessors alien.data alien.parser arrays assocs byte-arrays classes.struct combinators combinators.short-circuit cuda cuda.ffi fry generalizations io.backend kernel macros math -namespaces sequences words ; +namespaces sequences variants words ; FROM: classes.struct.private => compute-struct-offsets write-struct-slot ; QUALIFIED-WITH: alien.c-types c IN: cuda.libraries +VARIANT: cuda-abi + cuda32 cuda64 ; + SYMBOL: cuda-modules SYMBOL: cuda-functions @@ -95,14 +98,29 @@ ERROR: no-cuda-library name ; : fill-param-buffer ( values... buffer quots... n -- ) [ cleave-curry ] [ spread* ] bi ; inline -: >argument-type ( c-type -- c-type' ) - dup { [ c:void* = ] [ c:pointer? ] } 1|| [ drop CUdeviceptr ] when ; +: pointer-argument-type? ( c-type -- ? ) + { [ c:void* = ] [ CUdeviceptr = ] [ c:pointer? ] } 1|| ; -: >argument-struct-slot ( type -- slot ) - "cuda-arg" swap >argument-type { } ; +: abi-pointer-type ( abi -- type ) + { + { cuda32 [ c:uint ] } + { cuda64 [ CUulonglong ] } + } case ; -: [cuda-arguments] ( c-types -- quot ) - [ >argument-struct-slot ] map +: >argument-type ( c-type abi -- c-type' ) + swap { + { [ dup pointer-argument-type? ] [ drop abi-pointer-type ] } + { [ dup c:double = ] [ 2drop CUdouble ] } + { [ dup c:longlong = ] [ 2drop CUlonglong ] } + { [ dup c:ulonglong = ] [ 2drop CUulonglong ] } + [ nip ] + } cond ; + +: >argument-struct-slot ( c-type abi -- slot ) + >argument-type "cuda-arg" swap { } ; + +: [cuda-arguments] ( c-types abi -- quot ) + '[ _ >argument-struct-slot ] map [ compute-struct-offsets ] [ [ '[ _ write-struct-slot ] ] [ ] map-as ] [ length ] tri @@ -112,8 +130,8 @@ ERROR: no-cuda-library name ; ] ; PRIVATE> -MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) ) - [ [ 0 cuda-param-size ] ] [ [cuda-arguments] ] if-empty ; +MACRO: cuda-arguments ( c-types abi -- quot: ( args... function -- ) ) + [ [ 0 cuda-param-size ] ] swap '[ _ [cuda-arguments] ] if-empty ; : get-function-ptr ( module string -- function ) [ CUfunction ] 2dip @@ -128,9 +146,9 @@ MACRO: cuda-arguments ( c-types -- quot: ( args... function -- ) ) 2array cuda-functions get [ first2 get-function-ptr ] cache ; MACRO: cuda-invoke ( module-name function-name arguments -- ) - '[ + pick lookup-cuda-library abi>> '[ _ _ cached-function - [ nip _ cuda-arguments ] + [ nip _ _ cuda-arguments ] [ run-grid ] 2bi ] ; @@ -150,14 +168,19 @@ MACRO: cuda-invoke ( module-name function-name arguments -- ) : define-cuda-global ( word module-name symbol-name -- ) '[ _ _ cuda-global ] (( -- device-ptr )) define-declared ; -TUPLE: cuda-library name path handle ; +TUPLE: cuda-library name abi path handle ; +ERROR: bad-cuda-abi abi ; -: ( name path -- obj ) +: check-cuda-abi ( abi -- abi ) + dup cuda-abi? [ bad-cuda-abi ] unless ; inline + +: ( name abi path -- obj ) \ cuda-library new swap >>path - swap >>name ; + swap check-cuda-abi >>abi + swap >>name ; inline -: add-cuda-library ( name path -- ) +: add-cuda-library ( name abi path -- ) normalize-path dup name>> cuda-libraries get-global set-at ; diff --git a/extra/cuda/syntax/syntax.factor b/extra/cuda/syntax/syntax.factor index b09fff4739..09b7786cf9 100644 --- a/extra/cuda/syntax/syntax.factor +++ b/extra/cuda/syntax/syntax.factor @@ -1,13 +1,13 @@ ! Copyright (C) 2010 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: alien.parser cuda cuda.libraries io.backend -kernel lexer namespaces parser ; +fry kernel lexer namespaces parser ; IN: cuda.syntax SYNTAX: CUDA-LIBRARY: - scan scan normalize-path - [ add-cuda-library ] - [ drop current-cuda-library set-global ] 2bi ; + scan scan-word scan + '[ _ _ add-cuda-library ] + [ current-cuda-library set-global ] bi ; SYNTAX: CUDA-FUNCTION: scan [ create-in current-cuda-library get ] keep