项目优化之:GPU编程
1GPU編程,依賴于顯卡
2GPU變成依賴于OpenGL和direct
3CPU的特點(diǎn)是:頻率比較快,GPU的特點(diǎn)是寄存器非常非常的多。
4如果電腦是windows7,沒法直接調(diào)試GPU。Window8可以直接調(diào)試
5用VS2013新建一個(gè)項(xiàng)目,命名:GPU
6調(diào)試GPU的方式是VS中的:打斷點(diǎn)—>運(yùn)行項(xiàng)目à調(diào)試à窗口àGPU線程(通過這種方式實(shí)現(xiàn)調(diào)試GPU項(xiàng)目)
8.修改項(xiàng)目屬性:右擊項(xiàng)目à屬性à配置屬性à常規(guī),修改調(diào)試器類型為僅GPU
修改Amp默認(rèn)快捷鍵可以選擇時(shí)時(shí)(Use C++ AMP runtime default)的方式,也可以使用軟件加速器(WARPsoftware accelerator)的方式,截圖
9.代碼:
#include <iostream>
#include <amp.h>? //GPU編程所需的頭文件
using namespace concurrency;
?
int main()
{
??? int v[11] = { 'G', 'd', 'k', 'k', 'n', 31, 'v', 'n', 'q', 'k', 'c' };
??? array_view<int> av(11, v);//array_view是GPU計(jì)算結(jié)構(gòu),av存儲(chǔ)到GPU顯存
?
??? //=表示直接操作AV
??? //(index<1> idx)操作每一個(gè)元素
??? //restrict(amp)定位GPU執(zhí)行
??? parallel_for_each(av.extent, [=](index<1> idx) restrict(amp)
??? {
??????? av[idx] += 1;//加完后變成了hello world
??? });
??? for (unsigned int i = 0; i < 11; i++)
??? {
??????? std::cout << static_cast<char>(av[i]);
??? }
??? std::cin.get();
?
??? return 0;
}
10.CPU,GPU單值計(jì)算效率測(cè)試
案例:
#include <iostream>?
#include <amp.h>
#include <WinBase.h>
?
#define COUNT 100000
?
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
?
//GPU并行計(jì)算比較占有優(yōu)勢(shì),restrict(amp):限制使用GPU編程
double rungpu(int num) restrict(amp)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
?
??? return temp;
}
?
//cpu處理單值計(jì)算比較有優(yōu)勢(shì),單點(diǎn)計(jì)算比較有優(yōu)勢(shì),只能在GPU內(nèi)部執(zhí)行
double runcpu(int num) restrict(cpu)
{
??? //這是對(duì)一個(gè)數(shù)進(jìn)行操作
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//限制使用GPU或CPU運(yùn)行
double runcpugpu(int num) restrict(amp, cpu)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//測(cè)試單值計(jì)算的運(yùn)行效率
int main()
{
??? LARGE_INTEGER freq;
??? LARGE_INTEGER strt;
??? LARGE_INTEGER ed;
??? QueryPerformanceFrequency(&freq);
??? QueryPerformanceCounter(&strt);
??? double dx[1] = { 0.0 };
??? double? db = 0.0;
?
??? concurrency::array_view<double> myview(1, dx);
??? parallel_for_each(myview.extent,
??????? [=](concurrency::index<1> idx) restrict(amp)
??? {
??????? myview[idx] += rungpu(1000000);
??? });
?
??? myview.synchronize();//顯式等待GPU計(jì)算完成并將數(shù)據(jù)打回內(nèi)存
??? printf("%f\n", dx[0]);
?
??? QueryPerformanceCounter(&ed);
??? printf("GPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? QueryPerformanceCounter(&strt);
?
??? printf("%f\n", runcpu(1000000));
?
??? QueryPerformanceCounter(&ed);
??? printf("CPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? puts("測(cè)試結(jié)束");
?
??? getchar();
??? return 0;
}
運(yùn)行結(jié)果:
案例2:
#include <iostream>?
#include <amp.h>
#include <WinBase.h>
?
#define COUNT 3000
?
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
?
//GPU并行計(jì)算比較占有優(yōu)勢(shì),restrict(amp):限制使用GPU編程
double rungpu(int num) restrict(amp)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
?
??? return temp;
}
?
//cpu處理單值計(jì)算比較有優(yōu)勢(shì),單點(diǎn)計(jì)算比較有優(yōu)勢(shì),只能在GPU內(nèi)部執(zhí)行
double runcpu(int num) restrict(cpu)
{
??? //這是對(duì)一個(gè)數(shù)進(jìn)行操作
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
//限制使用GPU或CPU運(yùn)行
double runcpugpu(int num) restrict(amp, cpu)
{
??? double temp = 0;
??? for (int i = 0; i < num; i++)
??? {
??????? temp += i;
??? }
??? return temp;
}
?
int main()
{
??? LARGE_INTEGER freq;
??? LARGE_INTEGER strt;
??? LARGE_INTEGER ed;
??? QueryPerformanceFrequency(&freq);
??? QueryPerformanceCounter(&strt);
?
??? concurrency::array_view<float> myView(COUNT, nickName_GPU); //將數(shù)據(jù)打入顯存?
?
??? concurrency::parallel_for_each(myView.extent, [=](concurrency::index<1> idx) restrict(amp)
??? {
??????? for (int i = 0; i < COUNT / 10; i++)
??????? {
??????????? myView[idx] = (myView[idx] + 0.1f) / 2.3f;
??????? }
??? });
?
??? myView.synchronize();//顯式等待GPU計(jì)算完成并將數(shù)據(jù)打回內(nèi)存?
?
??? QueryPerformanceCounter(&ed);
??? printf("GPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
??? QueryPerformanceCounter(&strt);
?
??? for (int idx = 0; idx < COUNT; idx++)
??? {
??????? for (int i = 0; i < COUNT / 10; i++)
??????? {
??????????? nickName_CPU[idx] = (nickName_CPU[idx] + 0.1f) /2.3f;
??????? }
??? }
??? QueryPerformanceCounter(&ed);
??? printf("CPU耗時(shí): %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
?
??? for (int idx = 0; idx < COUNT; idx++)
??? {
??????? if (nickName_CPU[idx] != nickName_GPU[idx])
??????? {
??????????? puts("CPU和GPU的計(jì)算結(jié)果不相符!");
??????????? getchar();
??????????? return 0;
??????? }
??? }
??? puts("測(cè)試結(jié)束");
?
??? getchar();
??? return 0;
}
運(yùn)行結(jié)果:
?
?
?
總結(jié)
以上是生活随笔為你收集整理的项目优化之:GPU编程的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: STL之multiset中equal_r
- 下一篇: 征信报告去哪里打