/* hello-cpp.cu */ #include using namespace std; const int N = 16; const int CORES = 16; /* this is the GPU kernel function */ __global__ void hello(char* s) { /* blockIdx is a struct containing our block id if this this is a one-dimensional kernel, then x is the block id y and z are also available for 2 or 3 dimensional kernels */ /* capitalize the string by subtracting 32 from each lowercase letter */ if ((s[blockIdx.x] >= 'a') && (s[blockIdx.x] <= 'z')) { s[blockIdx.x] -= 32; } } /* the main function begins running on the CPU */ int main() { /* this is the string data - it is 'hello world', in lower-case */ char cpu_string[N] = "hello world!"; /* allocate space on the GPU for the string */ char* gpu_string; cudaMalloc((void**) &gpu_string, N * sizeof(char)); /* send the character array to the GPU */ cudaMemcpy(gpu_string, cpu_string, N * sizeof(char), cudaMemcpyHostToDevice); /* invoke the GPU to run the kernel in parallel we specify CORES cores which each run once */ hello<<>>(gpu_string); /* copy the string back from the GPU to the CPU */ cudaMemcpy(cpu_string, gpu_string, N * sizeof(char), cudaMemcpyDeviceToHost); /* free the memory we allocated on the GPU */ cudaFree(gpu_string); /* print the string we got back from the GPU */ cout << cpu_string << endl; return 0; }