66 lines
1.3 KiB
Plaintext
66 lines
1.3 KiB
Plaintext
/*
|
|
World using CUDA
|
|
**
|
|
** The string "Hello World!" is mangled then restored using a common CUDA idiom
|
|
**
|
|
** Byron Galbraith
|
|
** 2009-02-18
|
|
*/
|
|
#include <cuda.h>
|
|
#include <stdio.h>
|
|
|
|
// Prototypes
|
|
extern "C" __global__ void helloWorld(char*);
|
|
|
|
// Host function
|
|
int
|
|
main(int argc, char** argv)
|
|
{
|
|
int i;
|
|
|
|
// desired output
|
|
char str[] = "Hello World!";
|
|
|
|
// mangle contents of output
|
|
// the null character is left intact for simplicity
|
|
for(i = 0; i < 12; i++)
|
|
str[i] -= i;
|
|
|
|
// allocate memory on the device
|
|
char *d_str;
|
|
size_t size = sizeof(str);
|
|
cudaMalloc((void**)&d_str, size);
|
|
|
|
// copy the string to the device
|
|
cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
|
|
|
|
// set the grid and block sizes
|
|
dim3 dimGrid(2); // one block per word
|
|
dim3 dimBlock(6); // one thread per character
|
|
|
|
// invoke the kernel
|
|
helloWorld<<< dimGrid, dimBlock >>>(d_str);
|
|
|
|
// retrieve the results from the device
|
|
cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
|
|
|
|
// free up the allocated memory on the device
|
|
cudaFree(d_str);
|
|
|
|
// everyone's favorite part
|
|
printf("%s\n", str);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Device kernel
|
|
__global__ void
|
|
helloWorld(char* str)
|
|
{
|
|
// determine where in the thread grid we are
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
// unmangle output
|
|
str[idx] += idx;
|
|
}
|