1***@lnmiit.ac.in
2017-11-05 08:28:46 UTC
I am getting this error on using simple dense layer model or CNN model
using keras. Please help
GpuArrayException: ('The following error happened while compiling the
node', GpuAdvancedIncSubtensor1_dev20{inplace=True,
set_instead_of_inc=False}(GpuAlloc<None>{memset_0=True}.0, GpuReshape{2}.0,
GpuReshape{1}.0), '\n', u'CUDA kernel compile failure ::\n0001\t#define
local_barrier() __syncthreads()\n0002\t#define WITHIN_KERNEL extern "C"
__device__\n0003\t#define KERNEL extern "C" __global__\n0004\t#define
GLOBAL_MEM /* empty */\n0005\t#define LOCAL_MEM __shared__\n0006\t#define
LOCAL_MEM_ARG /* empty */\n0007\t#ifdef NAN\n0008\t#undef
NAN\n0009\t#endif\n0010\t#define NAN
__int_as_float(0x7fffffff)\n0011\t#ifdef INFINITY\n0012\t#undef
INFINITY\n0013\t#endif\n0014\t#define INFINITY
__int_as_float(0x7f800000)\n0015\t#define LID_0 threadIdx.x\n0016\t#define
LID_1 threadIdx.y\n0017\t#define LID_2 threadIdx.z\n0018\t#define LDIM_0
blockDim.x\n0019\t#define LDIM_1 blockDim.y\n0020\t#define LDIM_2
blockDim.z\n0021\t#define GID_0 blockIdx.x\n0022\t#define GID_1
blockIdx.y\n0023\t#define GID_2 blockIdx.z\n0024\t#define GDIM_0
gridDim.x\n0025\t#define GDIM_1 gridDim.y\n0026\t#define GDIM_2
gridDim.z\n0027\t#define ga_bool unsigned char\n0028\t#define ga_byte
signed char\n0029\t#define ga_ubyte unsigned char\n0030\t#define ga_short
short\n0031\t#define ga_ushort unsigned short\n0032\t#define ga_int
int\n0033\t#define ga_uint unsigned int\n0034\t#define ga_long long
long\n0035\t#define ga_ulong unsigned long long\n0036\t#define ga_float
float\n0037\t#define ga_double double\n0038\t#define ga_half
ga_ushort\n0039\t#define ga_size size_t\n0040\t#define ga_ssize
ptrdiff_t\n0041\t#define load_half(p) __half2float(*(p))\n0042\t#define
store_half(p, v) (*(p) = __float2half_rn(v))\n0043\t#define
GA_DECL_SHARED_PARAM(type, name)\n0044\t#define GA_DECL_SHARED_BODY(type,
name) extern __shared__ type name[];\n0045\t#define GA_WARP_SIZE
warpSize\n0046\t#line 1\n0047\t\n0048\t/*\n0049\t * This is an atomicAdd
that works for doubles since that is not provided\n0050\t * natively by
cuda.\n0051\t */\n0052\t__device__ ga_double atomicAdd(ga_double* address,
ga_double val) {\n0053\t unsigned long long int* address_as_ull
=\n0054\t (unsigned long long
int*)address;\n0055\t unsigned long long int old = *address_as_ull,
assumed;\n0056\t do {\n0057\t assumed = old;\n0058\t old =
atomicCAS(address_as_ull, assumed,\n0059\t
__double_as_longlong(val +\n0060\t
__longlong_as_double(assumed)));\n0061\t } while (assumed !=
old);\n0062\t return
__longlong_as_double(old);\n0063\t}\n0064\t\n0065\t__device__ ga_double
atomicExch(ga_double *address, ga_double val) {\n0066\t return
atomicExch((unsigned long long int *)address,\n0067\t
__double_as_longlong(val));\n0068\t}\n0069\t\n0070\t/*\n0071\t * This is a
version of atomicAdd that works for half-floats. It may\n0072\t * read and
write 2 bytes more than the size of the array if the array\n0073\t * has an
uneven number of elements. The actual value at that spot\n0074\t * will
not be modified.\n0075\t */\n0076\t\n0077\t__device__ ga_half
atomicAdd(ga_half *addr, ga_half val) {\n0078\t ga_uint *base = (ga_uint
*)((ga_size)addr & ~2);\n0079\t ga_uint old, assumed, sum, new_;\n0080\t
old = *base;\n0081\t do {\n0082\t assumed = old;\n0083\t sum =
__float2half_rn(\n0084\t __half2float(val) +\n0085\t
__half2float((ga_half)__byte_perm(old, 0,\n0086\t
((ga_size)addr & 2) ? 0x4432 : 0x4410)));\n0087\t new_ =
__byte_perm(old, sum, ((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0088\t
old = atomicCAS(base, assumed, new_);\n0089\t } while (assumed !=
old);\n0090\t return (ga_half)__byte_perm(old, 0,\n0091\t
((ga_size)addr & 2) ? 0x4432 :
0x4410);\n0092\t}\n0093\t\n0094\t__device__ ga_half atomicExch(ga_half
*addr, ga_half val) {\n0095\t ga_uint *base = (ga_uint *)((ga_size)addr &
~2);\n0096\t ga_uint old, assumed, new_;\n0097\t old = *base;\n0098\t do
{\n0099\t assumed = old;\n0100\t new_ = __byte_perm(old, val,
((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0101\t old = atomicCAS(base,
assumed, new_);\n0102\t } while (assumed != old);\n0103\t return
(ga_half)__byte_perm(old, 0,\n0104\t
((ga_size)addr & 2) ? 0x4432 : 0x4410);\n0105\t}\n0106\t\n0107\t
KERNEL void k_vector_add_fast(const ga_size numRowsX,\n0108\t
const ga_size numColsX,\n0109\t
const ga_ssize stridesX0,\n0110\t
const ga_ssize stridesX1,\n0111\t
ga_float *X,\n0112\t const
ga_size offset_X,\n0113\t const
ga_size numRowsY,\n0114\t const
ga_size numColsY,\n0115\t const
ga_ssize stridesY0,\n0116\t const
ga_ssize stridesY1,\n0117\t ga_float
*Y,\n0118\t const ga_size
offset_Y,\n0119\t const ga_size
numIndices,\n0120\t const ga_ssize
stridesIndices,\n0121\t ga_int
*indices_arr,\n0122\t const ga_size
offset_indices_arr,\n0123\t const int
set_instead_of_inc,\n0124\t ga_int
*err)\n0125\t {\n0126\t X = (ga_float *)(((char
*)X)+offset_X);\n0127\t Y = (ga_float *)(((char
*)Y)+offset_Y);\n0128\t indices_arr = (ga_int *)(((char
*)indices_arr)+offset_indices_arr);\n0129\t for (int i =
(blockIdx.x); i < numIndices; i += gridDim.x)\n0130\t
{\n0131\t for(int j = (threadIdx.x); j < numColsX;j +=
blockDim.x)\n0132\t {\n0133\t
ga_ssize x_row = indices_arr[i * stridesIndices];\n0134\t
if (x_row < 0)\n0135\t x_row +=
numRowsX;\n0136\t ga_ssize y_row = i;\n0137\t
if (x_row < numRowsX && x_row >= 0) {\n0138\t
if (set_instead_of_inc) {\n0139\t
atomicExch(&X[(x_row * stridesX0) + (j * stridesX1)],\n0140\t
Y[(y_row * stridesY0) + (j * stridesY1)]);\n0141\t
} else {\n0142\t
atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)],\n0143\t
Y[(y_row * stridesY0) + (j * stridesY1)]);\n0144\t
}\n0145\t } else {\n0146\t
*err = 1;\n0147\t }\n0148\t
}\n0149\t }\n0150\t return;\n0151\t
}\n0152\t \n\nCompile log:\nNVRTC compile
log::\ndefault_program(38): error: identifier "__half2float" is
undefined\n\ndefault_program(37): error: identifier "__float2half_rn" is
undefined\n\n2 errors detected in the compilation of
"default_program".\n\n')
using keras. Please help
GpuArrayException: ('The following error happened while compiling the
node', GpuAdvancedIncSubtensor1_dev20{inplace=True,
set_instead_of_inc=False}(GpuAlloc<None>{memset_0=True}.0, GpuReshape{2}.0,
GpuReshape{1}.0), '\n', u'CUDA kernel compile failure ::\n0001\t#define
local_barrier() __syncthreads()\n0002\t#define WITHIN_KERNEL extern "C"
__device__\n0003\t#define KERNEL extern "C" __global__\n0004\t#define
GLOBAL_MEM /* empty */\n0005\t#define LOCAL_MEM __shared__\n0006\t#define
LOCAL_MEM_ARG /* empty */\n0007\t#ifdef NAN\n0008\t#undef
NAN\n0009\t#endif\n0010\t#define NAN
__int_as_float(0x7fffffff)\n0011\t#ifdef INFINITY\n0012\t#undef
INFINITY\n0013\t#endif\n0014\t#define INFINITY
__int_as_float(0x7f800000)\n0015\t#define LID_0 threadIdx.x\n0016\t#define
LID_1 threadIdx.y\n0017\t#define LID_2 threadIdx.z\n0018\t#define LDIM_0
blockDim.x\n0019\t#define LDIM_1 blockDim.y\n0020\t#define LDIM_2
blockDim.z\n0021\t#define GID_0 blockIdx.x\n0022\t#define GID_1
blockIdx.y\n0023\t#define GID_2 blockIdx.z\n0024\t#define GDIM_0
gridDim.x\n0025\t#define GDIM_1 gridDim.y\n0026\t#define GDIM_2
gridDim.z\n0027\t#define ga_bool unsigned char\n0028\t#define ga_byte
signed char\n0029\t#define ga_ubyte unsigned char\n0030\t#define ga_short
short\n0031\t#define ga_ushort unsigned short\n0032\t#define ga_int
int\n0033\t#define ga_uint unsigned int\n0034\t#define ga_long long
long\n0035\t#define ga_ulong unsigned long long\n0036\t#define ga_float
float\n0037\t#define ga_double double\n0038\t#define ga_half
ga_ushort\n0039\t#define ga_size size_t\n0040\t#define ga_ssize
ptrdiff_t\n0041\t#define load_half(p) __half2float(*(p))\n0042\t#define
store_half(p, v) (*(p) = __float2half_rn(v))\n0043\t#define
GA_DECL_SHARED_PARAM(type, name)\n0044\t#define GA_DECL_SHARED_BODY(type,
name) extern __shared__ type name[];\n0045\t#define GA_WARP_SIZE
warpSize\n0046\t#line 1\n0047\t\n0048\t/*\n0049\t * This is an atomicAdd
that works for doubles since that is not provided\n0050\t * natively by
cuda.\n0051\t */\n0052\t__device__ ga_double atomicAdd(ga_double* address,
ga_double val) {\n0053\t unsigned long long int* address_as_ull
=\n0054\t (unsigned long long
int*)address;\n0055\t unsigned long long int old = *address_as_ull,
assumed;\n0056\t do {\n0057\t assumed = old;\n0058\t old =
atomicCAS(address_as_ull, assumed,\n0059\t
__double_as_longlong(val +\n0060\t
__longlong_as_double(assumed)));\n0061\t } while (assumed !=
old);\n0062\t return
__longlong_as_double(old);\n0063\t}\n0064\t\n0065\t__device__ ga_double
atomicExch(ga_double *address, ga_double val) {\n0066\t return
atomicExch((unsigned long long int *)address,\n0067\t
__double_as_longlong(val));\n0068\t}\n0069\t\n0070\t/*\n0071\t * This is a
version of atomicAdd that works for half-floats. It may\n0072\t * read and
write 2 bytes more than the size of the array if the array\n0073\t * has an
uneven number of elements. The actual value at that spot\n0074\t * will
not be modified.\n0075\t */\n0076\t\n0077\t__device__ ga_half
atomicAdd(ga_half *addr, ga_half val) {\n0078\t ga_uint *base = (ga_uint
*)((ga_size)addr & ~2);\n0079\t ga_uint old, assumed, sum, new_;\n0080\t
old = *base;\n0081\t do {\n0082\t assumed = old;\n0083\t sum =
__float2half_rn(\n0084\t __half2float(val) +\n0085\t
__half2float((ga_half)__byte_perm(old, 0,\n0086\t
((ga_size)addr & 2) ? 0x4432 : 0x4410)));\n0087\t new_ =
__byte_perm(old, sum, ((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0088\t
old = atomicCAS(base, assumed, new_);\n0089\t } while (assumed !=
old);\n0090\t return (ga_half)__byte_perm(old, 0,\n0091\t
((ga_size)addr & 2) ? 0x4432 :
0x4410);\n0092\t}\n0093\t\n0094\t__device__ ga_half atomicExch(ga_half
*addr, ga_half val) {\n0095\t ga_uint *base = (ga_uint *)((ga_size)addr &
~2);\n0096\t ga_uint old, assumed, new_;\n0097\t old = *base;\n0098\t do
{\n0099\t assumed = old;\n0100\t new_ = __byte_perm(old, val,
((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0101\t old = atomicCAS(base,
assumed, new_);\n0102\t } while (assumed != old);\n0103\t return
(ga_half)__byte_perm(old, 0,\n0104\t
((ga_size)addr & 2) ? 0x4432 : 0x4410);\n0105\t}\n0106\t\n0107\t
KERNEL void k_vector_add_fast(const ga_size numRowsX,\n0108\t
const ga_size numColsX,\n0109\t
const ga_ssize stridesX0,\n0110\t
const ga_ssize stridesX1,\n0111\t
ga_float *X,\n0112\t const
ga_size offset_X,\n0113\t const
ga_size numRowsY,\n0114\t const
ga_size numColsY,\n0115\t const
ga_ssize stridesY0,\n0116\t const
ga_ssize stridesY1,\n0117\t ga_float
*Y,\n0118\t const ga_size
offset_Y,\n0119\t const ga_size
numIndices,\n0120\t const ga_ssize
stridesIndices,\n0121\t ga_int
*indices_arr,\n0122\t const ga_size
offset_indices_arr,\n0123\t const int
set_instead_of_inc,\n0124\t ga_int
*err)\n0125\t {\n0126\t X = (ga_float *)(((char
*)X)+offset_X);\n0127\t Y = (ga_float *)(((char
*)Y)+offset_Y);\n0128\t indices_arr = (ga_int *)(((char
*)indices_arr)+offset_indices_arr);\n0129\t for (int i =
(blockIdx.x); i < numIndices; i += gridDim.x)\n0130\t
{\n0131\t for(int j = (threadIdx.x); j < numColsX;j +=
blockDim.x)\n0132\t {\n0133\t
ga_ssize x_row = indices_arr[i * stridesIndices];\n0134\t
if (x_row < 0)\n0135\t x_row +=
numRowsX;\n0136\t ga_ssize y_row = i;\n0137\t
if (x_row < numRowsX && x_row >= 0) {\n0138\t
if (set_instead_of_inc) {\n0139\t
atomicExch(&X[(x_row * stridesX0) + (j * stridesX1)],\n0140\t
Y[(y_row * stridesY0) + (j * stridesY1)]);\n0141\t
} else {\n0142\t
atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)],\n0143\t
Y[(y_row * stridesY0) + (j * stridesY1)]);\n0144\t
}\n0145\t } else {\n0146\t
*err = 1;\n0147\t }\n0148\t
}\n0149\t }\n0150\t return;\n0151\t
}\n0152\t \n\nCompile log:\nNVRTC compile
log::\ndefault_program(38): error: identifier "__half2float" is
undefined\n\ndefault_program(37): error: identifier "__float2half_rn" is
undefined\n\n2 errors detected in the compilation of
"default_program".\n\n')
--
---
You received this message because you are subscribed to the Google Groups "theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to theano-users+***@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
---
You received this message because you are subscribed to the Google Groups "theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to theano-users+***@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.