日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

基于OpenCL的mean filter性能

發布時間:2023/12/18 编程问答 31 豆豆
生活随笔 收集整理的這篇文章主要介紹了 基于OpenCL的mean filter性能 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1.對于一個標準的3*3 均值濾波,kernel代碼如下:

使用buffer/image緩沖對象

__kernel void filter(__global uchar4* inputImage, __global uchar4* outputImage, uint N) {int x = get_global_id(0);int y = get_global_id(1);int width = get_global_size(0);int height = get_global_size(1);int k = (N-1)/2;int n = N*N; //n*nif(x < k || y < k || x > width - k - 1 || y > height - k - 1) { outputImage[x + y * width] = inputImage[x + y * width];return; }uint4 finalcolor = (uint4)(0);int i,j;for(j = y - k; j <= y + k; j++) {for(i = x - k; i <= x + k; i++) { finalcolor = finalcolor + convert_uint4(inputImage[i + j * width]); } } outputImage[x + y * width] = convert_uchar4(finalcolor/n);}
__kernel void filterImg( image2d_t inputImage, __write_only image2d_t outputImage, uint N) {int x = get_global_id(0);int y = get_global_id(1);int width = get_global_size(0);int height = get_global_size(1);uint4 temp = read_imageui(inputImage, imageSampler, (int2)(x,y));int k = (N-1)/2;int n = N*N; //n*nif(x < k || y < k || x > width - k - 1 || y > height - k - 1) { write_imageui(outputImage, (int2)(x,y), temp);return; }/* k*k area */ uint4 finalcolor = (uint4)(0);int i,j;for(j = y - k; j <= y + k; j++) {for(i = x - k; i <= x + k; i++) { finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(i,j)); } }finalcolor = finalcolor/n;write_imageui(outputImage, (int2)(x,y), finalcolor);}

對一個2048*2048的圖像執行filter操作,

?

global work size = {2048, 2048, 1}, group work size = {16, 16}, 一般group work size應該為64的倍數,因為對于AMD顯卡,wave是基本的硬件線程調度單位。

使用了6個GPRs,沒有使用ScratchRegs,ScratchRregs是指用vedio meory來模擬GPR,但是線程執行的速度會大大降低,應盡量減少ScratchRegs的數量。

可以看到,使用image對象kernel執行時間要短,但奇怪的是各項性能參數都是buffer對象領先,除了alu busy和alu指令數目。

改為下面的kernel代碼,性能會有所提高

?

__kernel void filter(__global uchar4* inputImage, __global uchar4* outputImage, uint N) {int x = get_global_id(0);int y = get_global_id(1);int width = get_global_size(0);int height = get_global_size(1);if(x < 1 || y < 1 || x > width - 2 || y > height - 2) { outputImage[x + y * width] = inputImage[x + y * width];return; }uint4 finalcolor = (uint4)(0);finalcolor = finalcolor + convert_uint4(inputImage[x-1+( y-1) * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+( y-1) * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+1+( y-1) * width]); finalcolor = finalcolor + convert_uint4(inputImage[x-1+y * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+y * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+1+y * width]); finalcolor = finalcolor + convert_uint4(inputImage[x-1+( y+1) * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+( y+1) * width]); finalcolor = finalcolor + convert_uint4(inputImage[x+1+( y+1) * width]);outputImage[x + y * width] = convert_uchar4(finalcolor/9);} __kernel void filter1(__global uchar4* inputImage, __global uchar4* outputImage, uint N) {int x = get_global_id(0);int y = get_global_id(1);int width = get_global_size(0);int height = get_global_size(1);int k = (N-1)/2;int n = N*N; //n*nif(x < k || y < k || x > width - k - 1 || y > height - k - 1) { outputImage[x + y * width inputImage[x + y * width];return; }// if(x==209 && y ==243)//{// printf("final color:%d,%d,%d,%d\n", finalcolor.x, finalcolor.y, finalcolor.z,finalcolor.w);// }uint4 finalcolor = (uint4)(0);int i,j;for(j = y - k; j <= y + k; j++) {for(i = x - k; i <= x + k; i++) { finalcolor = finalcolor + convert_uint4(inputImage[i + j * width]); } } outputImage[x + y * width] = convert_uchar4(finalcolor/n);} __kernel void filterImg( image2d_t inputImage, __write_only image2d_t outputImage, uint N) {int x = get_global_id(0);int y = get_global_id(1);int width = get_global_size(0);int height = get_global_size(1);uint4 temp = read_imageui(inputImage, imageSampler, (int2)(x,y));if(x < 1 || y < 1 || x > width - 2 || y > height - 2) { write_imageui(outputImage, (int2)(x,y), temp);return; }/* k*k area */ uint4 finalcolor = (uint4)(0);finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x-1,y-1)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x,y-1)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x+1,y-1)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x-1,y)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x,y)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x+1,y)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x-1,y+1)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x,y+1)); finalcolor = finalcolor + read_imageui(inputImage, imageSampler, (int2)(x+1,y+1));finalcolor = finalcolor/9;write_imageui(outputImage, (int2)(x,y), finalcolor);}

創作挑戰賽新人創作獎勵來咯,堅持創作打卡瓜分現金大獎

總結

以上是生活随笔為你收集整理的基于OpenCL的mean filter性能的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。