Cuda kernel fails on launch cause of kernel pararameters -


i made simple cuda kernel fails launch reason dont understand. below see global vars.

unsigned int volume[256*256*256];//contains volume data of source unsigned int target[256*256*256];//contains volume data of target unsigned int* d_volume=null;//source data on device unsigned int* d_target=null;//target data on device 

the next function kernel launcher.

void launch_kernel(){  cudamalloc(&d_volume,256*256*256*sizeof(unsigned int)); cudamemcpy(d_volume, volume, 256*256*256*sizeof(unsigned int),cudamemcpyhosttodevice); cudamalloc(&d_target,256*256*256*sizeof(unsigned int)); cudamemcpy(d_target, target, 256*256*256*sizeof(unsigned int),cudamemcpyhosttodevice); dim3 threads(256,1,1); dim3 blocks(256,256,1);  simple_kernel<<<blocks,threads>>>(d_volume,d_target); cudaerror_t cudaresult; cudaresult = cudagetlasterror(); if (cudaresult != cudasuccess) {     cout<<"kernel failed"<<endl; } cudamemcpy(volume, d_volume, 256*256*256*sizeof( int),cudamemcpydevicetohost); cudafree(d_volume); cudamemcpy(target, d_target 256*256*256*sizeof( int),cudamemcpydevicetohost); cudafree(d_target); } 

problem seems on d_target cause if launch kernel that:

simple_kernel<<<blocks,threads>>>(d_volume,d_volume); 

it working perfeclty(passes on device values must passed) , no message appears. idea why happen? kernel declaration follows below.

 __global__ void simple_kernel(unsigned int* src,unsigned int* tgt){ //i dont think matters for.          int x = threadidx.x;          int y = blockidx.x;          int z = blockidx.y;          if(x!=0 || x!=255 || y!=0 || y!=255 || z!=0 || z!=255  ){//in bound of memory allocated             if( src[x*256*256+y*256+z]==tgt[x*256*256+y*256+z])                 if(tgt[(x+1)*256*256+y*256+z]==1 || tgt[(x-1)*256*256+y*256+z]==1 || tgt[(x-1)*256*256+(y+1)*256+z] ||tgt[(x-1)*256*256+(y-1)*256+z])                     src[x*256*256+y*256+z]=1;                 else                     src[x*256*256+y*256+z]=0;          }      } 

cuda can return error in case of out-of-bounds read access global memory. perform out-of-bounds read access in:
if(tgt[(x+1)*256*256+y*256+z]==1 || ...) e.g. x = y = z = 255 go through out-of-bounds checking.

in case launch kernel
simple_kernel<<<blocks,threads>>>(d_volume,d_volume);
during out-of-bounds read access access global memory has been allocated d_target arrays d_volume , d_target stored consecutively, hence, no error occurs.

confirm opinion further error-checking or launch program cuda-memcheck.


Comments

Popular posts from this blog

ios - iPhone/iPad different view orientations in different views , and apple approval process -

java Extracting Zip file -

C# WinForm - loading screen -