62#ifdef XT_CUDA_NVTX_ENABLE
63#include <nvToolsExt.h>
68#define MAX(a,b) ((a) >= (b) ? (a) : (b))
70#define STRINGIFY2(x) #x
71#define STRINGIFY(x) STRINGIFY2(x)
75 void * ptr = dlsym(libcuda_handle, STRINGIFY(name)); \
77 *(void **)(&(func_ ## name)) = ptr; \
79 load_successful = false; \
81 stderr, "ERROR:failed to load routine \"%s\" " \
82 "from %s\n", STRINGIFY(name), lib); \
86#define CU_ERROR_CHECK(ret) \
89 if(err != CUDA_SUCCESS) \
91 char const * err_string; \
93 func_cuGetErrorString(err, &err_string)) \
94 err_string = "undefined error"; \
95 fprintf(stderr, "Cuda driver error %d %s:: %s\n", \
96 __LINE__, __func__, err_string); \
103 (
void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
107 CUdeviceptr dstDevice, CUdeviceptr srcDevice,
size_t ByteCount);
109 CUdeviceptr dstDevice,
const void* srcHost,
size_t ByteCount);
111 void* dstHost, CUdeviceptr srcDevice,
size_t ByteCount);
120 "ERROR(xt_cuda_malloc): unsupported memory type",
140 "ERROR(xt_cuda_free): unsupported memory type",
153 void * dst,
void const * src,
size_t buffer_size,
156 if (src_memtype == dst_memtype) {
157 switch (src_memtype) {
161 "ERROR(xt_cuda_memcpy): unsupported memory type",
165 memcpy(dst, src, buffer_size);
170 (CUdeviceptr)dst, (CUdeviceptr)src, buffer_size));
174 switch (src_memtype) {
178 "ERROR(xt_cuda_memcpy): unsupported source memory type",
185 "ERROR(xt_cuda_memcpy): unsupported destination memory type",
194 "ERROR(xt_cuda_memcpy): unsupported destination memory type",
207 CUmemorytype memorytype;
210 &memorytype, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr);
217 ((ret == CUDA_SUCCESS) &&
218 (memorytype == CU_MEMORYTYPE_DEVICE))?
222#ifndef XT_CUDA_NVTX_ENABLE
229 static int first_call = 1;
230 static bool load_successful =
false;
232 if (!first_call)
return load_successful;
235 static const char lib[] =
238#elif defined __APPLE__ && defined __MACH__
241#warning "unsupported system, but trying libcuda.so.1"
245 void *libcuda_handle = dlopen(lib, RTLD_NOW);
247 load_successful = libcuda_handle != NULL;
249 if (load_successful) {
252 DLSYM(cuGetErrorString);
253 DLSYM(cuPointerGetAttribute);
260 dlclose(libcuda_handle);
264 int print_warning = 1;
267 char const * cuda_warning_env =
268 getenv(
"XT_CUDA_WARN_ON_MISSING_LIBCUDA");
270 if (cuda_warning_env) {
271 if (!strcmp(cuda_warning_env,
"0")) print_warning = 0;
272 else if (!strcmp(cuda_warning_env,
"1")) print_warning = 1;
276 "invalid value of XT_CUDA_WARN_ON_MISSING_LIBCUDA "
277 "environment variable (has to be \"0\" or \"1\")",
285 "-----------------------------------------------------------------------\n"
286 "WARNING: yaxt was compiled with CUDA-support, but the library could not\n"
287 " be loaded. CUDA-support will be deactivated. Try setting\n"
288 " LD_LIBRARY_PATH to the location of libcuda.so.1 or set RPATH\n"
290 " To suppress this message set the\n"
291 " XT_CUDA_WARN_ON_MISSING_LIBCUDA environment variable to \"0\"\n"
292 "-----------------------------------------------------------------------\n",
297 return load_successful;
305#ifdef XT_CUDA_NVTX_ENABLE
306 .Instr_push = nvtxRangePush,
307 .Instr_pop = nvtxRangePop,
add versions of standard API functions not returning on error
void *(* Malloc)(size_t alloc_size, enum xt_memtype memtype)
static int dummy_instr_pop()
static const char filename[]
static int load_cuda_library(void)
static CUresult(* func_cuMemcpyDtoH)(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount)
static CUresult(* func_cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize)
static void xt_cuda_free(void *ptr, enum xt_memtype memtype)
static CUresult(* func_cuPointerGetAttribute)(void *data, CUpointer_attribute attribute, CUdeviceptr ptr)
static CUresult(* func_cuGetErrorString)(CUresult error, const char **pStr)
static CUresult(* func_cuMemcpyDtoD)(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount)
static CUresult(* func_cuMemcpyHtoD)(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
#define CU_ERROR_CHECK(ret)
static int dummy_instr_push(char const *XT_UNUSED(name))
const struct xt_gpu_vtable * xt_cuda_init(void)
static void * xt_cuda_malloc(size_t alloc_size, enum xt_memtype memtype)
static struct xt_gpu_vtable const cuda_vtable
static void xt_cuda_memcpy(void *dst, void const *src, size_t buffer_size, enum xt_memtype dst_memtype, enum xt_memtype src_memtype)
static enum xt_memtype xt_cuda_get_memtype(const void *ptr)
static CUresult(* func_cuMemFree)(CUdeviceptr dptr)
routines for using CUDA in yaxt
routines for using GPU devices
#define XT_GPU_INSTR_PUSH(arg)