目前我用 Intel i3 CPU + AMD Radeon RX560 顯卡。
在程式中想使用overlap,即多個queue并行,發現有問題。下面的測驗程式起3個queue,并行的呼叫3個異步寫函式,分別將三塊資料(YUV三個分量,各2MB左右)寫進去。程式回圈的寫3次。
為什么clEnqueueWriteBuffer存在幾十ms的延遲? 這個延遲時間太長了。
程式如下:
int main() //main_v2
{
cl_platform_id *platformIds;
cl_device_id device; //TODO: extend for multi devices on platform
//should release below after use
cl_context ocl_ctx;
cl_command_queue mvQueue;
cl_command_queue yuvQueue[4];
cl_command_queue queue; //according to CUDA's default stream ?
int err = 0;
//獲取平臺
platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id));//為平臺串列申請堆疊空間
err = clGetPlatformIDs(1, platformIds, NULL);
//獲取設備
err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf("can't get gpu device, try cpu...\n");
err = clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
}
//創建OpenCL Context
ocl_ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (ocl_ctx == NULL) {
printf("create OpenCL context fail\n");
exit(EXIT_FAILURE);
}
//create command queue
cl_queue_properties props[] = {
CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE,
0
};
for(int i=0; i<4; i++)
{
yuvQueue[i] = clCreateCommandQueueWithProperties(ocl_ctx, device, NULL/*props*/, &err);
if (yuvQueue[i] == NULL) {
printf("create command queue fail %d\n",err);
exit(EXIT_FAILURE);
}
}
//創建記憶體物件
cl_mem pCurFrameObj;
char *orig_buffer = (char *)malloc(1920 * 1080 * 3);
pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
1920 * 1080 * 3, orig_buffer, &err);
//pCurFrameObj = clCreateBuffer(ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
// 1920 * 1080 * 3, NULL, &err);
int i = 0;
while(i++<3)
{
unsigned char *y = (unsigned char *)malloc(HEIGHT * WIDTH);
unsigned char *u = (unsigned char *)malloc(HEIGHT * WIDTH);
unsigned char *v = (unsigned char *)malloc(HEIGHT * WIDTH);
readFrameFromYUVFile("z:\\test_files\\test_mv.yuv", y, u, v, 0, WIDTH, HEIGHT);
err = clEnqueueWriteBuffer(yuvQueue[0], pCurFrameObj, CL_FALSE, 0, 1080*1920, (void*)y, 0, NULL, NULL);
err = clEnqueueWriteBuffer(yuvQueue[1], pCurFrameObj, CL_FALSE, 1080*1920, 1080*1920, (void*)u, 0, NULL, NULL);
err = clEnqueueWriteBuffer(yuvQueue[2], pCurFrameObj, CL_FALSE, 1080*1920 * 2, 1080*1920, (void*)v, 0, NULL, NULL);
//Sleep(10);
//free(v);
//free(u);
//free(y);
}
Sleep(60);
//free all opencl resources, like queues, memory objs, etc.
clReleaseMemObject(pCurFrameObj);
for(int i=0;i<4;i++)
clReleaseCommandQueue(yuvQueue[i]);
clReleaseContext(ocl_ctx);
printf("Press anykey to quit...");
getchar();
return 0;
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/120482.html
標籤:OpenCL和異構編程
上一篇:SpringBoot與JWT整合
下一篇:frist
