程序员人生 网站导航

OpenCL之图片旋转的实现

栏目:框架设计时间:2015-05-13 08:51:11

  • 原理
  • 前期
  • kernel
  • 配置文件
  • 效果
  • 参考

这里做的就是使用OpenCL对图象旋转90度,也算是1个比较入门级别的程序。希望对大家有所帮助吧,看着看着这些代码就熟习了。

原理

图象旋转是指把定义的图象绕某1点以逆时针或顺时针方向旋转1定的角度,通常是指绕图象的中心以逆时针方向旋转。假定图象的左上角为(left, top),右下角为(right, bottom),则图象上任意点(x0, y0) 绕其中心(xcenter, ycenter) 逆时针旋转angle 角度后,新的坐标位置(x′, y′) 的计算公式为:

公式

前期

需要对图象进行处理,那末在这里介绍1个库给大家:FreeImage。
不熟习的请看:请点这里。
使用这个库的方法:(通用方法,极有效)

  • 属性->C/C++->常规->附加包括目录:添加.h的路径。

  • 链接器->常规->附加库目录: 添加lib路径。

  • 链接器->输入->附加依赖项: 添加需要的lib名称。

  • 将dll文件放入exe路径下。

kernel

#pragma OPENCL EXTENSION cl_amd_printf : enable __kernel void image_rotate( __global uchar * src_data, __global uchar * dest_data, //Data in global memory int W, int H, //Image Dimensions float sinTheta, float cosTheta ) //Rotation Parameters { //Thread gets its index within index space const int ix = get_global_id(0); const int iy = get_global_id(1); int xc = W/2; int yc = H/2; int xpos = ( ix-xc)*cosTheta - (iy-yc)*sinTheta+xc; int ypos = ( ix-xc)*sinTheta + (iy-yc)*cosTheta+yc; if ((xpos>=0) && (xpos< W) && (ypos>=0) && (ypos< H)) //Bound Checking { dest_data[ypos*W+xpos]= src_data[iy*W+ix]; } }

我们把这个东西和CPU串行处理比较1下可以得到以下:

//CPU旋转图象:使用CPU来旋转图片 void cpu_rotate(unsigned char* inbuf, unsigned char* outbuf, int w, int h,float sinTheta, float cosTheta) { int i, j; int xc = w/2; int yc = h/2; for(i = 0; i < h; i++) { for(j=0; j< w; j++) { int xpos = ( j-xc)*cosTheta - (i-yc)*sinTheta+xc; int ypos = (j-xc)*sinTheta + ( i-yc)*cosTheta+yc; if(xpos>=0&&ypos>=0&&xpos<w&&ypos<h) outbuf[ypos*w + xpos] = inbuf[i*w+j]; } } }

对照以后我们发现OpenCL写kernel的时候循环没有了,取而代之的就是给出global_id便可。

配置文件

这里还触及到1些图片的操作,具体请看FreeImage的使用。

#include "stdafx.h" #include <CL/cl.h> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <iostream> #include <fstream> #include "gFreeImage.h" using namespace std; #define NWITEMS 4 #pragma comment (lib,"OpenCL.lib") #pragma comment (lib,"FreeImage.lib") //把文本文件读入1个string中,其实就是把运行程序传给从机 int convertToString(const char *filename, std::string& s) { size_t size; char* str; std::fstream f(filename, (std::fstream::in | std::fstream::binary)); if(f.is_open()) { size_t fileSize; f.seekg(0, std::fstream::end); size = fileSize = (size_t)f.tellg(); f.seekg(0, std::fstream::beg); str = new char[size+1]; if(!str) { f.close(); return NULL; } f.read(str, fileSize); f.close(); str[size] = ''; s = str; delete[] str; return 0; } printf("Error: Failed to open file %s ", filename); return 1; } //CPU旋转图象:使用CPU来旋转图片 void cpu_rotate(unsigned char* inbuf, unsigned char* outbuf, int w, int h,float sinTheta, float cosTheta) { int i, j; int xc = w/2; int yc = h/2; for(i = 0; i < h; i++) { for(j=0; j< w; j++) { int xpos = ( j-xc)*cosTheta - (i-yc)*sinTheta+xc; int ypos = (j-xc)*sinTheta + ( i-yc)*cosTheta+yc; if(xpos>=0&&ypos>=0&&xpos<w&&ypos<h) outbuf[ypos*w + xpos] = inbuf[i*w+j]; } } } int main(int argc, char* argv[]) { //装入图象 unsigned char *src_image=0; unsigned char *cpu_image=0; int W, H; gFreeImage img; if(!img.LoadImageGrey("lenna.jpg")) { printf("装入lenna.jpg失败 "); exit(0); } else src_image = img.getImageDataGrey(W, H); size_t mem_size = W*H; cpu_image = (unsigned char*)malloc(mem_size); cl_uint status; cl_platform_id platform; //创建平台对象 status = clGetPlatformIDs( 1, &platform, NULL ); cl_device_id device; //创建GPU装备 clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); //创建context cl_context context = clCreateContext( NULL, 1, &device, NULL, NULL, NULL); //创建命令队列 cl_command_queue queue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE, NULL ); //创建3个OpenCL内存对象,并把buf1的内容通过隐式拷贝的方式 //拷贝到clbuf1,buf2的内容通过显示拷贝的方式拷贝到clbuf2 cl_mem d_ip = clCreateBuffer( context, CL_MEM_READ_ONLY, mem_size, NULL, NULL); cl_mem d_op = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mem_size, NULL, NULL); status = clEnqueueWriteBuffer ( queue , d_ip, CL_TRUE, 0, mem_size, (void *)src_image, 0, NULL, NULL); const char * filename = "rotate.cl"; std::string sourceStr; status = convertToString(filename, sourceStr); const char * source = sourceStr.c_str(); size_t sourceSize[] = { strlen(source) }; //创建程序对象 cl_program program = clCreateProgramWithSource( context, 1, &source, sourceSize, NULL); //编译程序对象 status = clBuildProgram( program, 1, &device, NULL, NULL, NULL ); if(status != 0) { printf("clBuild failed:%d ", status); char tbuf[0x10000]; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL); printf(" %s ", tbuf); return -1; } //创建Kernel对象 //Use the “image_rotate” function as the kernel //创建Kernel对象 cl_kernel kernel = clCreateKernel( program, "image_rotate", NULL ); //设置Kernel参数 float sintheta = 1, costheta = 0; clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_ip); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_op); clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&W); clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&H); clSetKernelArg(kernel, 4, sizeof(cl_float), (void *)&sintheta); clSetKernelArg(kernel, 5, sizeof(cl_float), (void *)&costheta); //Set local and global workgroup sizes size_t localws[2] = {16,16} ; size_t globalws[2] = {W, H};//Assume divisible by 16 cl_event ev; //履行kernel clEnqueueNDRangeKernel( queue ,kernel, 2, 0, globalws, localws, 0, NULL, &ev); clFinish( queue ); //计算kerenl履行时间 cl_ulong startTime, endTime; clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL); clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL); cl_ulong kernelExecTimeNs = endTime-startTime; printf("kernal exec time :%8.6f ms ", kernelExecTimeNs*1e⑹ ); //数据拷回host内存 // copy results from device back to host unsigned char *op_data=0; op_data = (cl_uchar *) clEnqueueMapBuffer( queue, d_op, CL_TRUE, CL_MAP_READ, 0, mem_size, 0, NULL, NULL, NULL ); int i; cpu_rotate(src_image,cpu_image, W, H, 1, 0); for(i = 0; i < mem_size; i++) { src_image[i] =cpu_image[i]; } img.SaveImage("cpu_lenna_rotate.jpg"); for(i = 0; i < mem_size; i++) { src_image[i] =op_data[i]; } img.SaveImage("lenna_rotate.jpg"); if(cpu_image) free(cpu_image); //删除OpenCL资源对象 clReleaseMemObject(d_ip); clReleaseMemObject(d_op); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }

效果

最初的图片:

月球

用OpenCL处理以后:用了灰度图。

OpenCL

用CPU处理以后:用了灰度图。

OpenCL

参考

FreeImage下载,请点击这里。

参考代码,请点击这里。

------分隔线----------------------------
------分隔线----------------------------

最新技术推荐