CUDA - typical program structure
Last updated
Last updated
__global__ void cudaKernel(float *a, float *b, float *c)
{
int tID = some_mean_to_identify_thread_index;
c[tID] = a[tID] + b[tID];
}
int main(int argc, char* argv[]) {
/* Allocate and initialize vector a,b and c on both CPU and GPU */
/* Data transfer for copying input vectors a and b on GPU. */
cudaKernel<<<some_environment_info>>>(a, b, c);
/* Data transfer for copying output vector c on CPU */
/* Free memory. */
return 0;
}