Code Snippets Cuda

Short texts Cuda 🧑‍💻 Code snippets Cuda ✍️ Reading lists Cuda 👀

CUDA Stencil_1D (TASK_08)

// UCSC CMPE220 Advanced Parallel Processing // Prof. Heiner Leitz // Author: Marcelo Siero. // Modified from code by:: Andreas Goetz ([email protected]) // CUDA program to perform 1D stencil operation in parallel on the GPU // // /* FIXME */ COMMENTS ThAT REQUIRE ATTENTION #include #include #include #include #include #include // define vector length, stencil […]

cuda-kernel-gol3d.cu

__global__ void evolve_kernel(int *cell_arr, int *out_arr, int n) { int num_elem = n*n*n; int current_idx = blockIdx.x*blockDim.x+threadIdx.x; for (int idx=current_idx; idx

measuring bandwidth of some kernel with arbitrary struct inputs/outputs

#include #include #include #include “trove/ptr.h” class timer { typedef std::chrono::high_resolution_clock::time_point time_point; typedef std::chrono::duration duration_type; public: void start() { then = std::chrono::high_resolution_clock::now(); } void stop() { now = std::chrono::high_resolution_clock::now(); } double elapsed() { return std::chrono::duration_cast(now – then).count(); } private: time_point then, now; }; template < typename T > struct device_array{ device_array(int n) : n(n) { //std::cout

update_mat2.cu

#include #include #include #include #include #include #include “cuda_runtime.h” #include #include “device_launch_parameters.h” #include #include #include #include #include #include #define PI 3.14 #define real double #define size 100 #define cudaCheckError(code) \ { \ if ((code) != cudaSuccess) \ { \ fprintf(stderr, “Cuda failure %s:%d: ‘%s’ \n”, __FILE__, __LINE__, \ cudaGetErrorString(code)); \ } \ } void update_mat2(real **&mat, […]

Example_kernel_function.cu

__global__ void helloCuda(int *num1, int *num2, int *sum){ printf(“Hello World from CUDA GPU!\n”); *sum = *num1+*num2; }