CUDA拷贝二维数组到GPU内存中
CUDA 复杂问题 + 细节问题 解答 见 CUDA复杂问题 + 细节问题 解答
网上没找到如何拷贝二维数组到GPU的例程,所以我就自己写一个,其实原理也很简单,就是用三维指针导入给二维指针分配内存。
先说一下我们的目标:导入到GPU一个二维数组:dev_data[arrayNum][mwidth * mheight],该数组内元素的值是data[i][j] = i + 0.001*j。我们首先先输出data[i][7]。
然后我们再在GPU里把数组的data[i][7]全都改为82,然后再输出所有的data[i][7]。
最后应该得到的结果:
先说一下基本流程:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"
#include <iostream>
using std::cout;
using std::endl;
#define arrayNum 100
#define mwidth 10
#define mheight 10
float **data;
float ** midData;
float **dev_data;
int main()
{
//用于获取GPU内存数据来输出的数组
float *dev_c = 0;
float *c = (float*)malloc(arrayNum * sizeof(float));
cudaMalloc((void**)&dev_c, arrayNum * sizeof(float));
//初始化GPU内的数据
DataInit();
//把GPU内的数据输出
dim3 blocks(2, 2);
dim3 threads(5, 5);
cudaTest << <blocks, threads >> >(dev_c, dev_data);
cudaMemcpy(c, dev_c, arrayNum * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < arrayNum; i++) {
cout << c[i] << " ";
}
cout << endl;
cout << endl;
//改变一下GPU内存数据
cudaChange << <blocks, threads >> >(dev_c, dev_data);
//输出一下改变以后的值
cudaTest << <blocks, threads >> >(dev_c, dev_data);
cudaMemcpy(c, dev_c, arrayNum * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < arrayNum; i++) {
cout << c[i] << " ";
}
cout << endl;
cout << endl;
system("pause");
return 0;
}
核函数:
__global__ void cudaTest(float *c, float **dev_data)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
c[offset] = dev_data[offset][7];
}
__global__ void cudaChange(float *c, float **dev_data)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
dev_data[offset][7] = 82;
}
重点内容是DataInit()函数,负责数据的初始化:
float **data;
float ** midData;
float **dev_data;
首先先把二维数组进行初始化:
data = (float**)malloc(arrayNum * sizeof(float * ));
midData = (float**)malloc(arrayNum * sizeof(float *));
cudaMalloc((void***)&(dev_data), arrayNum * sizeof(float * ));
midData的作用是:
即,在CPU上保留GPU内存的地址,提供拷贝给GPU二维内存指针:
float **data;
float ** midData;
float **dev_data;
void DataInit(void) {
data = (float**)malloc(arrayNum * sizeof(float * ));
midData = (float**)malloc(arrayNum * sizeof(float *));
cudaMalloc((void***)&(dev_data), arrayNum * sizeof(float * ));
for (int i = 0;i < arrayNum;i++) {
data[i] = (float *)malloc(mwidth * mheight * sizeof(float));
cudaMalloc((void**)&(midData[i]), mwidth * mheight * sizeof(float));
for (int j = 0; j < mwidth * mheight; j++) {
data[i][j] = i + 0.001*j;
}
cudaMemcpy(midData[i], data[i], mwidth * mheight * sizeof(float), cudaMemcpyHostToDevice);
}
cudaMemcpy(dev_data, midData, arrayNum * sizeof(float *), cudaMemcpyHostToDevice);
}
这样就能把二维数组拷贝到内存里使用了。