texture dimensions can'tcan be able to 区别0 怎么办

6544人阅读
=====================================================SDL源代码分析系列文章列表:=====================================================上一篇文章分析了SDL中创建渲染器的函数SDL_CreateRenderer()。这篇文章继续分析SDL的源代码。本文分析SDL的纹理(SDL_Texture)。SDL播放视频的代码流程如下所示。初始化:&SDL_Init(): 初始化SDL。&SDL_CreateWindow(): 创建窗口(Window)。&SDL_CreateRenderer(): 基于窗口创建渲染器(Render)。&SDL_CreateTexture(): 创建纹理(Texture)。&循环渲染数据:&SDL_UpdateTexture(): 设置纹理的数据。&SDL_RenderCopy(): 纹理复制给渲染器。&SDL_RenderPresent(): 显示。上篇文章分析了该流程中的第3个函数SDL_CreateRenderer()。本文继续分析该流程中的第4个函数SDL_CreateTexture()。SDL_TextureSDL_Texture结构定义了一个SDL中的纹理。如果直接使用SDL2编译好的SDK的话,是看不到SDL_Texture的内部结构的。有关它的定义在头文件中只有一行代码,如下所示。/**
\brief An efficient driver-specific representation of pixel data
struct SDL_T
typedef struct SDL_Texture SDL_T在源代码工程中可以看到SDL_Texture的定义,位于render\SDL_sysrender.h文件中。它的定义如下。/* Define the SDL texture structure */
struct SDL_Texture
const void *
/**& The pixel format of the texture */
/**& SDL_TextureAccess */
/**& The width of the texture */
/**& The height of the texture */
/**& The texture modulation mode */
SDL_BlendMode blendM
/**& The texture blend mode */
Uint8 r, g, b,
/**& Texture modulation values */
SDL_Renderer *
/* Support for formats not supported directly by the renderer */
SDL_Texture *
SDL_SW_YUVTexture *
SDL_Rect locked_
/**& Driver specific texture representation */
SDL_Texture *
SDL_Texture *
};可以看出其中包含了一个“纹理”所具备的各种属性。下面来看看如何创建这个SDL_Texture。SDL_CreateTexture()函数简介使用SDL_CreateTexture()基于渲染器创建一个纹理。SDL_CreateTexture()的原型如下。SDL_Texture * SDLCALL SDL_CreateTexture(SDL_Renderer * renderer,
Uint32 format,
int access, int w,
int h);参数的含义如下。renderer:目标渲染器。format :纹理的格式。后面会详述。access :可以取以下值(定义位于SDL_TextureAccess中)& & SDL_TEXTUREACCESS_STATIC :变化极少& & SDL_TEXTUREACCESS_STREAMING :变化频繁& & SDL_TEXTUREACCESS_TARGET :暂时没有理解w
:纹理的宽h
:纹理的高创建成功则返回纹理的ID,失败返回0。函数调用关系图SDL_ CreateTexture ()关键函数的调用关系可以用下图表示。&上面的图片不太清晰,更清晰的图片上传到了相册里面:把相册里面的图片保存下来就可以得到清晰的图片了。源代码分析SDL_CreateTexture()的源代码位于render\SDL_render.c中。如下所示。SDL_Texture * SDL_CreateTexture(SDL_Renderer * renderer, Uint32 format, int access, int w, int h)
SDL_Texture *
CHECK_RENDERER_MAGIC(renderer, NULL);
if (!format) {
format = renderer-&info.texture_formats[0];
if (SDL_ISPIXELFORMAT_INDEXED(format)) {
SDL_SetError(&Palettized textures are not supported&);
return NULL;
if (w &= 0 || h &= 0) {
SDL_SetError(&Texture dimensions can't be 0&);
return NULL;
if ((renderer-&info.max_texture_width && w & renderer-&info.max_texture_width) ||
(renderer-&info.max_texture_height && h & renderer-&info.max_texture_height)) {
SDL_SetError(&Texture dimensions are limited to %dx%d&, renderer-&info.max_texture_width, renderer-&info.max_texture_height);
return NULL;
texture = (SDL_Texture *) SDL_calloc(1, sizeof(*texture));
if (!texture) {
SDL_OutOfMemory();
return NULL;
texture-&magic = &texture_
texture-&format =
texture-&access =
texture-&w =
texture-&h =
texture-&r = 255;
texture-&g = 255;
texture-&b = 255;
texture-&a = 255;
texture-&renderer =
texture-&next = renderer-&
if (renderer-&textures) {
renderer-&textures-&prev =
renderer-&textures =
if (IsSupportedFormat(renderer, format)) {
if (renderer-&CreateTexture(renderer, texture) & 0) {
SDL_DestroyTexture(texture);
texture-&native = SDL_CreateTexture(renderer,
GetClosestSupportedFormat(renderer, format),
access, w, h);
if (!texture-&native) {
SDL_DestroyTexture(texture);
return NULL;
/* Swap textures to have texture before texture-&native in the list */
texture-&native-&next = texture-&
if (texture-&native-&next) {
texture-&native-&next-&prev = texture-&
texture-&prev = texture-&native-&
if (texture-&prev) {
texture-&prev-&next =
texture-&native-&prev =
texture-&next = texture-&
renderer-&textures =
if (SDL_ISPIXELFORMAT_FOURCC(texture-&format)) {
texture-&yuv = SDL_SW_CreateYUVTexture(format, w, h);
if (!texture-&yuv) {
SDL_DestroyTexture(texture);
return NULL;
} else if (access == SDL_TEXTUREACCESS_STREAMING) {
/* The pitch is 4 byte aligned */
texture-&pitch = (((w * SDL_BYTESPERPIXEL(format)) + 3) & ~3);
texture-&pixels = SDL_calloc(1, texture-&pitch * h);
if (!texture-&pixels) {
SDL_DestroyTexture(texture);
return NULL;
}从源代码中可以看出,SDL_CreateTexture()的大致流程如下。1. 检查输入参数的合理性。例如像素格式是否支持,宽和高是否小于等于0等等。2. 新建一个SDL_Texture。调用SDL_calloc()(实际上就是calloc())为新建的SDL_Texture分配内存。3. 调用SDL_Render的CreateTexture()方法创建纹理。这一步是整个函数的核心。下面我们详细看一下几种不同的渲染器的CreateTexture()的方法。1. Direct3DDirect3D 渲染器中对应CreateTexture()的函数是D3D_CreateTexture(),它的源代码如下所示(位于render\direct3d\SDL_render_d3d.c)。static int D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
D3D_RenderData *renderdata = (D3D_RenderData *) renderer-&
D3D_TextureData *
data = (D3D_TextureData *) SDL_calloc(1, sizeof(*data));
if (!data) {
return SDL_OutOfMemory();
data-&scaleMode = GetScaleQuality();
texture-&driverdata =
#ifdef USE_DYNAMIC_TEXTURE
if (texture-&access == SDL_TEXTUREACCESS_STREAMING) {
pool = D3DPOOL_DEFAULT;
usage = D3DUSAGE_DYNAMIC;
if (texture-&access == SDL_TEXTUREACCESS_TARGET) {
/* D3DPOOL_MANAGED does not work with D3DUSAGE_RENDERTARGET */
pool = D3DPOOL_DEFAULT;
usage = D3DUSAGE_RENDERTARGET;
pool = D3DPOOL_MANAGED;
usage = 0;
IDirect3DDevice9_CreateTexture(renderdata-&device, texture-&w,
texture-&h, 1, usage,
PixelFormatToD3DFMT(texture-&format),
pool, &data-&texture, NULL);
if (FAILED(result)) {
return D3D_SetError(&CreateTexture()&, result);
if (texture-&format == SDL_PIXELFORMAT_YV12 ||
texture-&format == SDL_PIXELFORMAT_IYUV) {
data-&yuv = SDL_TRUE;
IDirect3DDevice9_CreateTexture(renderdata-&device, texture-&w / 2,
texture-&h / 2, 1, usage,
PixelFormatToD3DFMT(texture-&format),
pool, &data-&utexture, NULL);
if (FAILED(result)) {
return D3D_SetError(&CreateTexture()&, result);
IDirect3DDevice9_CreateTexture(renderdata-&device, texture-&w / 2,
texture-&h / 2, 1, usage,
PixelFormatToD3DFMT(texture-&format),
pool, &data-&vtexture, NULL);
if (FAILED(result)) {
return D3D_SetError(&CreateTexture()&, result);
}从代码中可以看出,该函数调用了Direct3D的API函数IDirect3DDevice9_CreateTexture()创建了一个纹理。2. OpenGLOpenGL渲染器中对应CreateTexture()的函数是GL_CreateTexture (),它的源代码如下所示(位于render\opengl\SDL_render_gl.c)。static int GL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
GL_RenderData *renderdata = (GL_RenderData *) renderer-&
GL_TextureData *
GLint internalF
GLenum format,
int texture_w, texture_h;
GLenum scaleM
GL_ActivateRenderer(renderer);
if (!convert_format(renderdata, texture-&format, &internalFormat,
&format, &type)) {
return SDL_SetError(&Texture format %s not supported by OpenGL&,
SDL_GetPixelFormatName(texture-&format));
data = (GL_TextureData *) SDL_calloc(1, sizeof(*data));
if (!data) {
return SDL_OutOfMemory();
if (texture-&access == SDL_TEXTUREACCESS_STREAMING) {
data-&pitch = texture-&w * SDL_BYTESPERPIXEL(texture-&format);
size = texture-&h * data-&
if (texture-&format == SDL_PIXELFORMAT_YV12 ||
texture-&format == SDL_PIXELFORMAT_IYUV) {
/* Need to add size for the U and V planes */
size += (2 * (texture-&h * data-&pitch) / 4);
data-&pixels = SDL_calloc(1, size);
if (!data-&pixels) {
SDL_free(data);
return SDL_OutOfMemory();
if (texture-&access == SDL_TEXTUREACCESS_TARGET) {
data-&fbo = GL_GetFBO(renderdata, texture-&w, texture-&h);
data-&fbo = NULL;
GL_CheckError(&&, renderer);
renderdata-&glGenTextures(1, &data-&texture);
if (GL_CheckError(&glGenTexures()&, renderer) & 0) {
SDL_free(data);
return -1;
texture-&driverdata =
if ((renderdata-&GL_ARB_texture_rectangle_supported)
/* && texture-&access != SDL_TEXTUREACCESS_TARGET */){
data-&type = GL_TEXTURE_RECTANGLE_ARB;
texture_w = texture-&w;
texture_h = texture-&h;
data-&texw = (GLfloat) texture_w;
data-&texh = (GLfloat) texture_h;
data-&type = GL_TEXTURE_2D;
texture_w = power_of_2(texture-&w);
texture_h = power_of_2(texture-&h);
data-&texw = (GLfloat) (texture-&w) / texture_w;
data-&texh = (GLfloat) texture-&h / texture_h;
data-&format =
data-&formattype =
scaleMode = GetScaleQuality();
renderdata-&glEnable(data-&type);
renderdata-&glBindTexture(data-&type, data-&texture);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MIN_FILTER, scaleMode);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MAG_FILTER, scaleMode);
/* According to the spec, CLAMP_TO_EDGE is the default for TEXTURE_RECTANGLE
and setting it causes an INVALID_ENUM error in the latest NVidia drivers.
if (data-&type != GL_TEXTURE_RECTANGLE_ARB) {
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_S,
GL_CLAMP_TO_EDGE);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_T,
GL_CLAMP_TO_EDGE);
#ifdef __MACOSX__
#ifndef GL_TEXTURE_STORAGE_HINT_APPLE
#define GL_TEXTURE_STORAGE_HINT_APPLE
#ifndef STORAGE_CACHED_APPLE
#define STORAGE_CACHED_APPLE
#ifndef STORAGE_SHARED_APPLE
#define STORAGE_SHARED_APPLE
if (texture-&access == SDL_TEXTUREACCESS_STREAMING) {
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_STORAGE_HINT_APPLE,
GL_STORAGE_SHARED_APPLE);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_STORAGE_HINT_APPLE,
GL_STORAGE_CACHED_APPLE);
if (texture-&access == SDL_TEXTUREACCESS_STREAMING
&& texture-&format == SDL_PIXELFORMAT_ARGB8888
&& (texture-&w % 8) == 0) {
renderdata-&glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
renderdata-&glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
renderdata-&glPixelStorei(GL_UNPACK_ROW_LENGTH,
(data-&pitch / SDL_BYTESPERPIXEL(texture-&format)));
renderdata-&glTexImage2D(data-&type, 0, internalFormat, texture_w,
texture_h, 0, format, type, data-&pixels);
renderdata-&glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
renderdata-&glTexImage2D(data-&type, 0, internalFormat, texture_w,
texture_h, 0, format, type, NULL);
renderdata-&glDisable(data-&type);
if (GL_CheckError(&glTexImage2D()&, renderer) & 0) {
return -1;
if (texture-&format == SDL_PIXELFORMAT_YV12 ||
texture-&format == SDL_PIXELFORMAT_IYUV) {
data-&yuv = SDL_TRUE;
renderdata-&glGenTextures(1, &data-&utexture);
renderdata-&glGenTextures(1, &data-&vtexture);
renderdata-&glEnable(data-&type);
renderdata-&glBindTexture(data-&type, data-&utexture);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MIN_FILTER,
scaleMode);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MAG_FILTER,
scaleMode);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_S,
GL_CLAMP_TO_EDGE);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_T,
GL_CLAMP_TO_EDGE);
renderdata-&glTexImage2D(data-&type, 0, internalFormat, texture_w/2,
texture_h/2, 0, format, type, NULL);
renderdata-&glBindTexture(data-&type, data-&vtexture);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MIN_FILTER,
scaleMode);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_MAG_FILTER,
scaleMode);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_S,
GL_CLAMP_TO_EDGE);
renderdata-&glTexParameteri(data-&type, GL_TEXTURE_WRAP_T,
GL_CLAMP_TO_EDGE);
renderdata-&glTexImage2D(data-&type, 0, internalFormat, texture_w/2,
texture_h/2, 0, format, type, NULL);
renderdata-&glDisable(data-&type);
return GL_CheckError(&&, renderer);
}从代码中可以看出,该函数调用了OpenGL的API函数glGenTextures(),glBindTexture()创建了一个纹理。并且使用glTexParameteri()设置了有关的一些参数。在这里有一点需要注意,在OpenGL渲染器中,如果输入像素格式是YV12或者IYUV,就会使用3个纹理。3. SoftwareSoftware渲染器中对应CreateTexture()的函数是SW_CreateTexture (),它的源代码如下所示(位于render\software\SDL_render_sw.c)。static int SW_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
Uint32 Rmask, Gmask, Bmask, A
if (!SDL_PixelFormatEnumToMasks
(texture-&format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
return SDL_SetError(&Unknown texture format&);
texture-&driverdata =
SDL_CreateRGBSurface(0, texture-&w, texture-&h, bpp, Rmask, Gmask,
Bmask, Amask);
SDL_SetSurfaceColorMod(texture-&driverdata, texture-&r, texture-&g,
texture-&b);
SDL_SetSurfaceAlphaMod(texture-&driverdata, texture-&a);
SDL_SetSurfaceBlendMode(texture-&driverdata, texture-&blendMode);
if (texture-&access == SDL_TEXTUREACCESS_STATIC) {
SDL_SetSurfaceRLE(texture-&driverdata, 1);
if (!texture-&driverdata) {
return -1;
}该函数的源代码还没有详细分析。可以看出其中调用了SDL_CreateRGBSurface()创建了“Surface”。
参考知识库
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
访问:5416786次
积分:49120
积分:49120
排名:第47名
原创:375篇
转载:159篇
译文:28篇
评论:5877条
姓名:雷霄骅
网名:leixiaohua1020
中国传媒大学-广播电视工程
中国传媒大学-数字电视技术
中国传媒大学-数字视频技术
[注1:QQ消息较多,难以一一回复,见谅]
[注2:CSDN私信功能使用很少,有问题可以直接在博客评论处留言]
主要从事与广播电视有关的视音频技术的研究。包括视音频质量评价,视音频编解码,流媒体,媒资检索等。
【SourceForge】【主】
【Github】
【开源中国】
欢迎转载本博客原创或翻译文章,但请声明出处,谢谢!
本QQ群旨在为视音频技术同行方便交流提供一个平台。无论是实验室,电视台,互联网视频,安防,播放器,媒体中心等等都可以加入讨论。欢迎新手和大牛,多交流可以更快的进步~1号群【2000人】:2号群【1000人】:通知:1号群成员容量为2000人,目前已经接近上限,为了给新成员入群讨论的机会,会定期清理不发言的用户,希望大家理解,谢谢支持! 2号群为新创建的群,欢迎加入~ 针对近期出现的各种问题,为保障本群和谐发展制定了《群规》,新成员入群后请阅读位于群公告中的《群规》了解本群的规则
文章:135篇
阅读:2488186
文章:91篇
阅读:692094
文章:41篇
阅读:256251CUDA中Texture&Memory的学习
今天完整的阅读了CUDA C Programming Guide中关于Texture Memory的几个部分。
重要的地方做了翻译。
同时,对CUDA Samples中的SimpleTexture作了修改测试,使符合我自己最后需要的应用方式。
在SobelFilter例程中,使用到了texture ,准确来说是,texture
reference(而
不是texture
object,虽然两者区别并不是很大)。
为什么要使用texture是我看这一部分的主要原因,因为我考虑的是,host读取image后,copy到device端(存储在global memory中)。
那么kernel调用中就可以直接操作图像的各个像素了,为什么我需要一个
texture,它会更快么?而且在kernel中取一个像素点,还要进行Texture
Fetch操作(我这
里是用Tex2D())。
所以搜索了一下,关于这个问题,stackoverflow上有人做了一些回答:
简单总结是这样的:
& &Texture
Cache optimized for 2D spatial access pattern
Reads have some advantages like address modes and interpolation
that can be used
&at no extra cost
Global Memory:
Slow & uncached(1.0),cached(2.0)
Requires sequential & aligned 16 byte reads and writes to be
fast (coalesced
&read/write)
memory针对2D访问优化,且带有cache,而且针对访问模式和插值都很有帮助。
memory没有cache。
但是,看起来,Texture
这些优势,除了cache之外,对于我的应用似乎并没有太
大的好处。纠结之余,还是把文档看完了,把例程修改了,测试了一把。其实也不能说是测
试,只是想看看,原始放进Texture的数据,读出来还是不是老样子。
文档中某些片段翻译记录如下,Texture Memory里面主要介绍Texture
Object和Texture Reference,其他内容如Texture Gather 、Layered
Texture就只是粗略看一下了。
3.2.10.1 Texture
Memory中某段
mode,可以设为cudaReadModeNormalizedFloat或者是cudaReadModeElementType.如果是前者,而且texel的类型为16bit或者8bit整型数,则由texture
fetch函数返回的值,实际上是浮点数类型,而且无符号整型数的范围被映射到[0.0,1.0],有符号数被映射到[-1.0,1.0]。例如,无符号8bit的元素使用0xff来读,被认为是1.但如果采用的是后一种类型,则不会做任何转换。
另外一段:
Whether texture coordinates are normalized or not. By default,
textures are&
referenced (by the functions of Texture Functions) using
floating-point coordinates in&
the range [0, N-1] where
N is the size of the texture in the dimension
corresponding&
to the coordinate. For
example, a texture that is 64x32 in size will be referenced
coordinates in the range
[0, 63] and [0, 31] for the x and y dimensions,
respectively.
Normalized texture
coordinates cause the coordinates to be specified in the
[0.0, 1.0-1/N] instead of
[0, N-1]。
3.2.10.1.2
Texture Reference API中某段
channelDesc
describes the it must match the DataType
argument of the texture reference declaration
还有关于texture 绑定数据的说明:
在内核可以使用texture reference从texture memory中读取数据之前,texture
reference必须被绑定到一个texture上,
使用cudaBindTexture()或者cudaBindTexture2D()绑定到线性存储器中,亦或者cudaBindTextureToArray()banding到CUDA
array上。cudaUnbindTexture()用于解绑。
建议分配二维的texture线性存储器时,使用cudaMallocPitch(),并且使用它返回的pitch值,作为参数给到cudaBindTexture2D()。
绑定一个texture到texture
reference的的格式,必须与声明texture reference时候的格式一致,否则,texture
fetch的结果会没有定义。
还有个比较重要的:
和surface内存是有缓存的,而且在同一个kernel调用中,这份缓存不会与global内存的写操作保持一致,所以任何对已经由同一个kernel调用的global
write写过后的地址,进行的读操作将会返回未定义的值。换句话说,一个线程可以安全的读取一些texture内存位置,当且仅当这个内存地址已经被以前的内核调用或者内存拷贝修改过,但是如果是被同一个内核调用中的同一个线程或者其它线程修改过,那就会失败。
测试代码,将CUDA的例程做了较大的修改,验证一下我的想法。
__global__ void
transformKernel_integer(unsigned char
*outputData,
int width,
int height
// calculate normalized texture coordinates
//计算归一化的纹理坐标,[0,1)
//感觉这个地方存在问题,blockIdx 和threadIdx均以0为开始,
//比如对于两者为0时候,则算得x=0,y=0;自然从tex中取得的数据就不行了
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
//简单的将texture里面的数据读取出来。
outputData[y*width + x] = tex2D(tex_integer, x,y);//
void runTest_integer(int
argc, char **argv)
int devID =
findCudaDevice(argc, (const char **) argv);
// load image from
//无符号整形
unsigned char *hData =
unsigned int width,
char *imagePath =
sdkFindFilePath(imageFilename, argv[0]);
if (imagePath ==
printf("Unable to source
image file: %s\n", imageFilename);
exit(EXIT_FAILURE);
sdkLoadPGM(imagePath,
&hData, &width, &height);
//采用 无符号整形
unsigned int size = width *
height * sizeof(unsigned char);
printf("Loaded '%s', %d x %d
pixels\n", imageFilename, width, height);
// Load reference image from
image (output)
//采用 无符号整形
unsigned char *hDataRef =
(unsigned char &*) malloc(size);
char *refPath =
sdkFindFilePath(refFilename, argv[0]);
if (refPath ==
printf("Unable to find
reference image file: %s\n", refFilename);
exit(EXIT_FAILURE);
sdkLoadPGM(refPath,
&hDataRef, &width, &height);
// Allocate device memory for
//采用 无符号整形
unsigned char *dData =
checkCudaErrors(cudaMalloc((void
**) &dData, size));
// Allocate array and copy
image data
//采用 无符号整形
//必须与texture
reference的DataType相同
//而且我们采用了unsigned char
的话,只需要8bit
cudaChannelFormatDesc
channelDesc =
cudaCreateChannelDesc(8, 0,
0, 0, cudaChannelFormatKindUnsigned);
checkCudaErrors(cudaMallocArray(&cuArray,
&channelDesc,
checkCudaErrors(cudaMemcpyToArray(cuArray,
cudaMemcpyHostToDevice));
// Set texture
parameters
//设置为wrap模式,一旦越界就为0
tex_integer.addressMode[0] =
cudaAddressModeW
tex_integer.addressMode[1] =
cudaAddressModeW
tex_integer.addressMode[2] =
cudaAddressModeW
//采用point模式,取最近值,而非二维插值
tex_integer.filterMode =
cudaFilterModeP
//使用非归一化的方式,
tex_integer.normalized =
// Bind the array to the
checkCudaErrors(cudaBindTextureToArray(tex_integer,
cuArray, channelDesc));
dim3 dimBlock(16, 16,
源代码里面,这个地方是错误的,修正如下
// dim3 dimGrid(width /
dimBlock.x, height / dimBlock.y, 1);
dim3 dimGrid(( width +
dimBlock.x - 1) / dimBlock.x,
( height + dimBlock.y -1) /
dimBlock.y, 1);
transformKernel_integer&&&&(dData,
width, height);
checkCudaErrors(cudaDeviceSynchronize());
StopWatchInterface *timer =
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
// Execute the
transformKernel_integer&&&&(dData,
width, height);
// Check if kernel execution
generated an error
getLastCudaError("Kernel
execution failed");
checkCudaErrors(cudaDeviceSynchronize());
sdkStopTimer(&timer);
printf("Processing time: %f
(ms)\n", sdkGetTimerValue(&timer));
printf("%.2f
Mpixels/sec\n",
(width *height /
(sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);
sdkDeleteTimer(&timer);
// Allocate mem for the
result on host side
unsigned char *hOutputData =
(unsigned char &*) malloc(size);
// copy result from device to
checkCudaErrors(cudaMemcpy(hOutputData,
cudaMemcpyDeviceToHost));
// Write result to
outputFilename[1024];
strcpy(outputFilename,
imagePath);
strcpy(outputFilename +
strlen(imagePath) - 4, "_integer_out.pgm");
sdkSavePGM(outputFilename,
hOutputData, width, height);
printf("Wrote '%s'\n",
outputFilename);
// Write regression file if
if (checkCmdLineFlag(argc,
(const char **) argv, "regression"))
// Write file for regression
//采用 无符号整形
sdkWriteFile("./data/regression.dat",
hOutputData,
width*height,
// We need to reload the data
from disk,
// because it is inverted
upon output
sdkLoadPGM(outputFilename,
&hOutputData, &width, &height);
printf("Comparing
files\n");
printf("\toutput:
已投稿到:
以上网友发言只代表其个人观点,不代表新浪网的观点或立场。

我要回帖

更多关于 you can be the hero 的文章

 

随机推荐