OpenCL 第6课:矩阵转置
生活随笔
收集整理的這篇文章主要介紹了
OpenCL 第6课:矩阵转置
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
上一節(jié)我們寫了個(gè)一維向量相加的程序。這節(jié)我們來(lái)看一個(gè)4×4矩陣轉(zhuǎn)置程序。
4X4矩陣我們采用二維數(shù)組進(jìn)行存儲(chǔ),在程序設(shè)計(jì)上,我們讓轉(zhuǎn)置過(guò)程分4次轉(zhuǎn)置完成,就是一次轉(zhuǎn)一行。注意這里的OpenCL的工作維數(shù)是二維。(當(dāng)然用一維的方式也可以,只是在CL代碼中要用到循環(huán),效率不高)
程序分兩部份:
(1)transposition.cl代碼
?| 1 2 3 4 5 6 7 8 9 10 11 | __kernel void transposition(__global int* A, ????????????????????__global int* B) { ????//獲取索引號(hào),這里是二維的,所以可以取兩個(gè) ????//否則另一個(gè)永遠(yuǎn)是0 ????int col = get_global_id(0); ????int row = get_global_id(1); ????B[col*4+row] = A[row*4+col]; } |
(2)main.cpp代碼
?| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | #include <iostream> #include <stdio.h> #include <string.h> #include <string> #include <CL/cl.h>//包含CL的頭文件 using namespace std; //4x4數(shù)組 #define dim_x 4 #define dim_y 4 //從外部文件獲取cl內(nèi)核代碼 bool GetFileData(const char* fname,string& str) { ????FILE* fp = fopen(fname,"r"); ????if(fp==NULL) ????{ ????????printf("no found file\n"); ????????return false; ????} ????int n=0; ????while(feof(fp)==0) ????{ ????????str += fgetc(fp); ????} ????return true; } int main() { ????//先讀外部CL核心代碼,如果失敗則退出。 ????//代碼存buf_code里面 ????string code_file; ????if(false == GetFileData("transposition.cl",code_file)) ????????return 0; ????char* buf_code = new char[code_file.size()]; ????strcpy(buf_code,code_file.c_str()); ????buf_code[code_file.size()-1] = NULL; ????//聲明CL所需變量。 ????cl_device_id device; ????cl_platform_id platform_id = NULL; ????cl_context context; ????cl_command_queue cmdQueue; ????cl_mem bufferA,bufferB,bufferC; ????cl_program program; ????cl_kernel kernel = NULL; ????//我們使用的是二維向量 ????//設(shè)定向量大小(維數(shù)) ????size_t globalWorkSize[2]; ????globalWorkSize[0] = dim_x ; ????globalWorkSize[1] = dim_y; ????cl_int err; ????/* ????????定義輸入變量和輸出變量,并設(shè)定初值 ????*/ ????int buf_A[dim_x][dim_y]; ????int buf_B[dim_x][dim_y]; ????size_t datasize = sizeof(int) * dim_x * dim_y; ????int n=0; ????int m=0; ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????buf_A[m][n] = m + n*dim_x; ????????} ????} ????//step 1:初始化OpenCL ????err = clGetPlatformIDs(1,&platform_id,NULL); ????if(err!=CL_SUCCESS) ????{ ????????cout<<"clGetPlatformIDs error"<<endl; ????????return 0; ????} ????//這次我們只用CPU來(lái)進(jìn)行并行運(yùn)算,當(dāng)然你也可以該成GPU ????clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_GPU,1,&device,NULL); ????//step 2:創(chuàng)建上下文 ????context = clCreateContext(NULL,1,&device,NULL,NULL,NULL); ????//step 3:創(chuàng)建命令隊(duì)列 ????cmdQueue = clCreateCommandQueue(context,device,0,NULL); ????//step 4:創(chuàng)建數(shù)據(jù)緩沖區(qū) ????bufferA = clCreateBuffer(context, ?????????????????????????????CL_MEM_READ_ONLY, ?????????????????????????????datasize,NULL,NULL); ????bufferB = clCreateBuffer(context, ?????????????????????????????CL_MEM_WRITE_ONLY, ?????????????????????????????datasize,NULL,NULL); ????//step 5:將數(shù)據(jù)上傳到緩沖區(qū) ????clEnqueueWriteBuffer(cmdQueue, ?????????????????????????bufferA,CL_FALSE, ?????????????????????????0,datasize, ?????????????????????????buf_A,0, ?????????????????????????NULL,NULL); ????//step 6:加載編譯代碼,創(chuàng)建內(nèi)核調(diào)用函數(shù) ????program = clCreateProgramWithSource(context,1, ????????????????????????????????????????(const char**)&buf_code, ????????????????????????????????????????NULL,NULL); ????clBuildProgram(program,1,&device,NULL,NULL,NULL); ????kernel = clCreateKernel(program,"transposition",NULL); ????//step 7:設(shè)置參數(shù),執(zhí)行內(nèi)核 ????clSetKernelArg(kernel,0,sizeof(cl_mem),&bufferA); ????clSetKernelArg(kernel,1,sizeof(cl_mem),&bufferB); ????//<span style="color: #ff0000;"><strong>注意這里第三個(gè)參數(shù)已經(jīng)改成2,表示二維數(shù)據(jù)。</strong></span> ????clEnqueueNDRangeKernel(cmdQueue,kernel, ???????????????????????????2,NULL, ???????????????????????????globalWorkSize, ???????????????????????????NULL,0,NULL,NULL); ????//step 8:取回計(jì)算結(jié)果 ????clEnqueueReadBuffer(cmdQueue,bufferB,CL_TRUE,0, ????????????????????????datasize,buf_B,0,NULL,NULL); ????//輸出計(jì)算結(jié)果 ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????cout<< buf_A[m][n] <<","; ????????} ????????cout<<endl; ????} ????cout<<endl<<"====transposition===="<<endl<<endl; ????for(n=0;n<dim_x;n++) ????{ ????????for(m=0;m<dim_y;m++) ????????{ ????????????cout<< buf_B[m][n] <<","; ????????} ????????cout<<endl; ????} ????//釋放所有調(diào)用和內(nèi)存 ????clReleaseKernel(kernel); ????clReleaseProgram(program); ????clReleaseCommandQueue(cmdQueue); ????clReleaseMemObject(bufferA); ????clReleaseMemObject(bufferB); ????clReleaseContext(context); ????delete buf_code; ????return 0; } |
運(yùn)算結(jié)果:
?
?
?
總結(jié)
以上是生活随笔為你收集整理的OpenCL 第6课:矩阵转置的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: IOS开发基础之汽车品牌项目-14
- 下一篇: 不怕烧钱怕翻车:雷军与马化腾现场“过招”