日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁(yè) > 编程资源 > 编程问答 >内容正文

编程问答

项目优化之:GPU编程

發(fā)布時(shí)間:2024/9/27 编程问答 21 豆豆
生活随笔 收集整理的這篇文章主要介紹了 项目优化之:GPU编程 小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.


1GPU編程,依賴于顯卡

2GPU變成依賴于OpenGLdirect

3CPU的特點(diǎn)是:頻率比較快,GPU的特點(diǎn)是寄存器非常非常的多。

4如果電腦是windows7,沒法直接調(diào)試GPUWindow8可以直接調(diào)試

5VS2013新建一個(gè)項(xiàng)目,命名:GPU

6調(diào)試GPU的方式是VS中的:打斷點(diǎn)—>運(yùn)行項(xiàng)目à調(diào)試à窗口àGPU線程(通過這種方式實(shí)現(xiàn)調(diào)試GPU項(xiàng)目)

8.修改項(xiàng)目屬性:右擊項(xiàng)目à屬性à配置屬性à常規(guī),修改調(diào)試器類型為僅GPU

修改Amp默認(rèn)快捷鍵可以選擇時(shí)時(shí)(Use C++ AMP runtime default)的方式,也可以使用軟件加速器(WARPsoftware accelerator)的方式,截圖

9.代碼:

#include <iostream>

#include <amp.h>? //GPU編程所需的頭文件

using namespace concurrency;

?

int main()

{

??? int v[11] = { 'G', 'd', 'k', 'k', 'n', 31, 'v', 'n', 'q', 'k', 'c' };

??? array_view<int> av(11, v);//array_viewGPU計(jì)算結(jié)構(gòu),av存儲(chǔ)到GPU顯存

?

??? //=表示直接操作AV

??? //(index<1> idx)操作每一個(gè)元素

??? //restrict(amp)定位GPU執(zhí)行

??? parallel_for_each(av.extent, [=](index<1> idx) restrict(amp)

??? {

??????? av[idx] += 1;//加完后變成了hello world

??? });

??? for (unsigned int i = 0; i < 11; i++)

??? {

??????? std::cout << static_cast<char>(av[i]);

??? }

??? std::cin.get();

?

??? return 0;

}

10.CPU,GPU單值計(jì)算效率測(cè)試

案例:

#include <iostream>?

#include <amp.h>

#include <WinBase.h>

?

#define COUNT 100000

?

float nickName_GPU[COUNT];

float nickName_CPU[COUNT];

?

//GPU并行計(jì)算比較占有優(yōu)勢(shì),restrict(amp):限制使用GPU編程

double rungpu(int num) restrict(amp)

{

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

?

??? return temp;

}

?

//cpu處理單值計(jì)算比較有優(yōu)勢(shì),單點(diǎn)計(jì)算比較有優(yōu)勢(shì),只能在GPU內(nèi)部執(zhí)行

double runcpu(int num) restrict(cpu)

{

??? //這是對(duì)一個(gè)數(shù)進(jìn)行操作

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

??? return temp;

}

?

//限制使用GPUCPU運(yùn)行

double runcpugpu(int num) restrict(amp, cpu)

{

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

??? return temp;

}

?

//測(cè)試單值計(jì)算的運(yùn)行效率

int main()

{

??? LARGE_INTEGER freq;

??? LARGE_INTEGER strt;

??? LARGE_INTEGER ed;

??? QueryPerformanceFrequency(&freq);

??? QueryPerformanceCounter(&strt);

??? double dx[1] = { 0.0 };

??? double? db = 0.0;

?

??? concurrency::array_view<double> myview(1, dx);

??? parallel_for_each(myview.extent,

??????? [=](concurrency::index<1> idx) restrict(amp)

??? {

??????? myview[idx] += rungpu(1000000);

??? });

?

??? myview.synchronize();//顯式等待GPU計(jì)算完成并將數(shù)據(jù)打回內(nèi)存

??? printf("%f\n", dx[0]);

?

??? QueryPerformanceCounter(&ed);

??? printf("GPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);

??? QueryPerformanceCounter(&strt);

?

??? printf("%f\n", runcpu(1000000));

?

??? QueryPerformanceCounter(&ed);

??? printf("CPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);

??? puts("測(cè)試結(jié)束");

?

??? getchar();

??? return 0;

}

運(yùn)行結(jié)果:

案例2

#include <iostream>?

#include <amp.h>

#include <WinBase.h>

?

#define COUNT 3000

?

float nickName_GPU[COUNT];

float nickName_CPU[COUNT];

?

//GPU并行計(jì)算比較占有優(yōu)勢(shì),restrict(amp):限制使用GPU編程

double rungpu(int num) restrict(amp)

{

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

?

??? return temp;

}

?

//cpu處理單值計(jì)算比較有優(yōu)勢(shì),單點(diǎn)計(jì)算比較有優(yōu)勢(shì),只能在GPU內(nèi)部執(zhí)行

double runcpu(int num) restrict(cpu)

{

??? //這是對(duì)一個(gè)數(shù)進(jìn)行操作

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

??? return temp;

}

?

//限制使用GPUCPU運(yùn)行

double runcpugpu(int num) restrict(amp, cpu)

{

??? double temp = 0;

??? for (int i = 0; i < num; i++)

??? {

??????? temp += i;

??? }

??? return temp;

}

?

int main()

{

??? LARGE_INTEGER freq;

??? LARGE_INTEGER strt;

??? LARGE_INTEGER ed;

??? QueryPerformanceFrequency(&freq);

??? QueryPerformanceCounter(&strt);

?

??? concurrency::array_view<float> myView(COUNT, nickName_GPU); //將數(shù)據(jù)打入顯存?

?

??? concurrency::parallel_for_each(myView.extent, [=](concurrency::index<1> idx) restrict(amp)

??? {

??????? for (int i = 0; i < COUNT / 10; i++)

??????? {

??????????? myView[idx] = (myView[idx] + 0.1f) / 2.3f;

??????? }

??? });

?

??? myView.synchronize();//顯式等待GPU計(jì)算完成并將數(shù)據(jù)打回內(nèi)存?

?

??? QueryPerformanceCounter(&ed);

??? printf("GPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);

??? QueryPerformanceCounter(&strt);

?

??? for (int idx = 0; idx < COUNT; idx++)

??? {

??????? for (int i = 0; i < COUNT / 10; i++)

??????? {

??????????? nickName_CPU[idx] = (nickName_CPU[idx] + 0.1f) /2.3f;

??????? }

??? }

??? QueryPerformanceCounter(&ed);

??? printf("CPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);

?

??? for (int idx = 0; idx < COUNT; idx++)

??? {

??????? if (nickName_CPU[idx] != nickName_GPU[idx])

??????? {

??????????? puts("CPUGPU的計(jì)算結(jié)果不相符!");

??????????? getchar();

??????????? return 0;

??????? }

??? }

??? puts("測(cè)試結(jié)束");

?

??? getchar();

??? return 0;

}

運(yùn)行結(jié)果:

?

?

?

總結(jié)

以上是生活随笔為你收集整理的项目优化之:GPU编程的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。