一.核函式(運行在gpu上)
__kernel void matrix_mult(
const int Ndim,
const int Mdim,
const int Pdim,
__global const float* A,
__global const float* B,
__global float* C)
{
//兩個執行緒同時執行
int i = get_global_id(0);
int j = get_global_id(1);
int k;
float tmp;
if ((i < Ndim) && (j < Mdim)) {
tmp = 0.0;
for (k = 0; k < Pdim; k++)
tmp += A[i * Pdim + k] * B[k * Mdim + j];
C[i * Mdim + j] = tmp;
}
}
二. 輸入矩陣形式(博主列舉了三種形式的矩陣輸入,代碼中通過注釋給出:(1)自動生成(2)直接給定(3)動態輸入)
```c
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
using namespace std;
#define NWITEMS 6
#pragma comment (lib,"OpenCL.lib")
//把文本檔案讀入一個 string 中
int convertToString(const char* filename, std::string& s)
{
size_t size;
char* str;
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
if (f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
if (!str)
{
f.close();
return NULL;
}
f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
}
int main()
{
cl_uint status;
cl_platform_id platform;
//創建平臺物件
status = clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
//創建 GPU 設備
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
//創建context
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
//創建命令佇列
cl_command_queue commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);
if (commandQueue == NULL)
perror("Failed to create commandQueue for device 0.");
//建立要傳入從機的資料
/******** 創建內核和記憶體物件 ********/
//輸入定義矩陣的長寬
int Ndim, Pdim, Mdim;
cout << "請輸入A矩陣長" << endl;
cin >> Ndim;
cout << "請輸入A矩陣寬" << endl;
cin >> Pdim;
cout << "請輸入B矩陣寬" << endl;
cin >> Mdim;
int szA = Ndim * Pdim;
int szB = Pdim * Mdim;
int szC = Ndim * Mdim;
float* A;
float* B;
float* C;
A = (float*)malloc(szA * sizeof(float));
B = (float*)malloc(szB * sizeof(float));
C = (float*)malloc(szC * sizeof(float));
int i, j;
/* (1)回圈矩陣
for (i = 0; i < szA; i++)
A[i] = (float)((float)i + 1.0);
for (i = 0; i < szB; i++)
B[i] = (float)((float)i + 1.0);
(2)測驗手動輸入的矩陣
A[0] = 1;
A[1] = 6;
A[2] = 2;
A[3] = 4;
B[0] = 2;
B[1] = 4;
B[2] = 1;
B[3] = 4;
下面是第三種,手動輸入:
*/
cout << "請輸入矩陣A,按陣列形式單個輸入" << endl;
for (int i = 0; i < szA; i++)
cin >> A[i];
cout << "請輸入矩陣B,按陣列形式單個輸入" << endl;
for (int i = 0; i < szB; i++)
cin >> B[i];
//創建三個 OpenCL 記憶體物件,并把buf1 的內容通過隱式拷貝的方式
//拷貝到clbuf1, buf2 的內容通過顯示拷貝的方式拷貝到clbuf2
cl_mem memObjects[3] = { 0, 0, 0 };
memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(float) * szA, A, NULL);
memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(float) * szB, B, NULL);
memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(float) * szC, C, NULL);
if (memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
perror("Error in clCreateBuffer.\n");
const char* filename = "lhl.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);
if (status)
cout << status << " !!!!!!!!" << endl;
const char* source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) };
//創建程式物件
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
//編譯程式物件
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (status)
cout << status << " !!!!!!!!" << endl;
if (status != 0)
{
printf("clBuild failed:%d\n", status);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf,
NULL);
printf("\n%s\n", tbuf);
//return ?1;
}
//創建 Kernel 物件
cl_kernel kernel = clCreateKernel(program, "matrix_mult", NULL);
//設定 Kernel 引數
cl_int clnum = NWITEMS;
status = clSetKernelArg(kernel, 0, sizeof(int), &Ndim);
status = clSetKernelArg(kernel, 1, sizeof(int), &Mdim);
status = clSetKernelArg(kernel, 2, sizeof(int), &Pdim);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &memObjects[0]);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &memObjects[1]);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &memObjects[2]);
if (status)
cout << "引數設定錯誤" << endl;
//執行 kernel
size_t global[2];
cl_event prof_event;
cl_ulong ev_start_time = (cl_ulong)0;
cl_ulong ev_end_time = (cl_ulong)0;
double rum_time;
global[0] = (size_t)Ndim;
global[1] = (size_t)Mdim;
status = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL,
global, NULL, 0, NULL, &prof_event);
if (status)
cout << "執行內核時錯誤" << endl;
clFinish(commandQueue);
//讀取時間
status = clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_QUEUED,
sizeof(cl_ulong), &ev_start_time, NULL);
status = clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &ev_end_time, NULL);
if (status)
perror("讀取時間的時候發生錯誤\n");
rum_time = (double)(ev_end_time - ev_start_time);
cout << "執行時間為:" << rum_time << endl;
//資料拷回 host 記憶體
status = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
sizeof(float) * szC, C, 0, NULL, NULL);
if (status)
perror("讀回資料的時候發生錯誤\n");
//結果顯示
printf("\nArray A:\n");
for (i = 0; i < Ndim; i++) {
for (j = 0; j < Pdim; j++)
printf("%.1f\t", A[i * Pdim + j]);
printf("\n");
}
printf("\nArray B:\n");
for (i = 0; i < Pdim; i++) {
for (j = 0; j < Mdim; j++)
printf("%.1f\t", B[i * Mdim + j]);
printf("\n");
}
printf("\nArray C:\n");
for (i = 0; i < Ndim; i++) {
for (j = 0; j < Mdim; j++)
printf("%.1f\t", C[i * Mdim + j]);
printf("\n");
}
cout << endl;
if (A)
free(A);
if (B)
free(B);
if (C)
free(C);
//洗掉 OpenCL 資源物件
clReleaseMemObject(memObjects[2]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[0]);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
system("pause");
return 0;
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/qukuanlian/230610.html
標籤:區塊鏈
上一篇:簡單理解量子計算
下一篇:【教程】IPFS-FIL挖礦指南
