日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

CUDA编程--并行矩阵向量乘法【80+行代码】

發布時間:2025/4/16 编程问答 29 豆豆
生活随笔 收集整理的這篇文章主要介紹了 CUDA编程--并行矩阵向量乘法【80+行代码】 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

簡述

矩陣向量乘法。

  • 讀取文件data.txt
  • 并輸入到output.txt文件中
  • 用typedef方便的修改數據類型(要是寫成模板也是可以的)

代碼

#include "cuda_runtime.h" #include "device_launch_parameters.h" #include <iostream> #include <fstream> #include <iomanip> #include <stdio.h>typedef double DATA;// Kernal: __global__ void MatrixMultiply(DATA *a, DATA * b, DATA *c, int N) {int tx = threadIdx.x + blockIdx.x * blockDim.x;if (tx < N) {DATA sum = 0;for (int k = 0; k < N; ++k) {sum += a[tx * N + k] * b[k];}c[tx] = sum;} }cudaError_t matrixMultiplyWithCuda(DATA *a, DATA *b, DATA *c, size_t size);int main() {std::ifstream in("data.txt");int N;in >> N;if (in.fail()) {printf("Something wrong\n");}else {printf("Success read\n");}// host initialDATA *a = new DATA[N * N];DATA *b = new DATA[N];DATA *c = new DATA[N];// read for (int i = 0; i < N; ++i)for (int j = 0; j < N; ++j) in >> a[i * N + j];for (int i = 0; i < N; ++i) in >> b[i];cudaError_t cudaStatus = matrixMultiplyWithCuda(a, b, c, N);std::ofstream out("output.txt");for (int i = 0; i < N; ++i) {out << std::setiosflags(std::ios::fixed) << c[i] << " ";out << std::endl;}cudaStatus = cudaThreadExit();// host free delete[] a;delete[] b;delete[] c;return 0; } cudaError_t matrixMultiplyWithCuda(DATA *a, DATA *b, DATA *c, size_t N) {DATA *dev_a = 0;DATA *dev_b = 0;DATA *dev_c = 0;cudaError_t cudaStatus;cudaStatus = cudaMalloc((void**)&dev_a, N * N * sizeof(DATA));cudaStatus = cudaMalloc((void**)&dev_b, N * sizeof(DATA));cudaStatus = cudaMalloc((void**)&dev_c, N * sizeof(DATA));cudaStatus = cudaMemcpy(dev_a, a, N * N * sizeof(DATA), cudaMemcpyHostToDevice);cudaStatus = cudaMemcpy(dev_b, b, N * sizeof(DATA), cudaMemcpyHostToDevice);if (cudaStatus != cudaSuccess) {printf("Something wrong\n");goto Error;}// kernal invocation dim3 threadPerBlock(500, 1, 1);dim3 numBlocks(N / threadPerBlock.x+1, 1, 1);MatrixMultiply<<<numBlocks, threadPerBlock>>>(dev_a, dev_b, dev_c, N);if (cudaStatus != cudaSuccess) {printf( "Calculate wrong\n");goto Error;}cudaStatus = cudaMemcpy(c, dev_c, N * sizeof(DATA), cudaMemcpyDeviceToHost); Error:cudaFree(dev_a);cudaFree(dev_b);cudaFree(dev_c);return cudaStatus; }

總結

以上是生活随笔為你收集整理的CUDA编程--并行矩阵向量乘法【80+行代码】的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。