詳解 CUDA By Example 中的 Julia Set 繪制GPU優化-有解無憂

筆者測驗環境VS2019，

基本介紹

原書作者引入Julia Sets意在使用GPU加速圖形的繪制，Julia Set 是指滿足下式迭代收斂的復數集合

\[Z_{n+1}=Z_{n}^2+C \]

環境配置

跑這個例子的主要困難應該在于配置環境，這個程式依賴于openGL中的glut庫，由于VS2019的整個軟體架構發生了很大變化，一些鏈接庫和頭檔案的位置都發生了改變，因此一些文章中的配置方法失效了，

首先我們需要獲取glut庫的頭檔案以及元件，

點擊這里cg-toolkit獲取，安裝成功之后，找到C:\Program Files (x86)\NVIDIA Corporation\Cg，注意勾選安裝選項的x64相關應用，

將其中的lib檔案夾中的_glut32.lib_復制到C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x86

將其中的lib.x64檔案夾中的glut32.lib復制到C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64并且重命名其為glut64.lib

筆者運行的是64位系統，就將bin.x64中的_glut32.dll_復制到C:\Windows\System32下

在這里下載頭檔案，下載完成之后，將頭檔案拷貝到C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt，并建立檔案夾GL把它們包括起來，

提示，核心是找到C:\Program Files (x86)\Windows Kits\10，不要在Microsoft Visual Studio檔案夾里浪費時間，

后面的10.0.18362.0根據版本不同可能不一致，具體問題具體分析

這個代碼還需要一些別的頭檔案，如gl_helper.h, book.h, cpu_bitmap.h 等在這里下載后復制到C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include

主要代碼

CPU Julia Set

RGBA模式中，每一個像素會保存以下資料：R值（紅色分量）、G值（綠色分量）、B值（藍色分量）和A值（alpha分量），其中紅、綠、藍三種顏色相組合，就可以得到我們所需要的各種顏色，而alpha不直接影響顏色，它的含義是透明度，1

下面是純粹CPU中的代碼，基本的注釋在代碼中

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include "device_atomic_functions.h"
#include <cuda.h>
#include "book.h"
#include <cpu_bitmap.h>
#include <stdio.h>

#define DIM 1000		//影像的像素邊長大小

struct cuComplex
{
	float r;
	float i;
	cuComplex(float a, float b) : r(a), i(b) {}
	float magnitude2() { return r * r + i * i; }	//計算復數的模值
	cuComplex operator* (const cuComplex& a)
	{
		return cuComplex(r * a.r - i * a.i, i * a.r + r * a.i);
	}
	cuComplex operator+ (const cuComplex& a)
	{
		return cuComplex(r + a.r, i + a.i);
	}
};

int julia(int x, int y)
{
	const float scale = 1.5;	//放大倍率
	float jx = scale * (float)(DIM / 2 - x) / (DIM / 2);	//坐標變換，投影到-1~1scale
	float jy = scale * (float)(DIM / 2 - y) / (DIM / 2);
	cuComplex c(-0.8, 0.156);	//基數
	cuComplex a(jx, jy);
	int i = 0;
	for (i = 0; i < 200; i++)	//迭代
	{
		a = a * a + c;
		if (a.magnitude2() > 1000)
			return 0;
	}
	return 1;
}

void kernel(unsigned char* ptr)
{
	for (int y = 0; y < DIM; y++)	//遍歷整個bitmap
	{
		for (int x = 0; x < DIM; x++)
		{
			int offset = x + y * DIM;
			int juliaValue = https://www.cnblogs.com/fishmingee/p/julia(x, y);
			//注意openGL這里的顏色格式是RGBA,000為黑色
			ptr[offset * 4 + 0] = 255 * juliaValue;
			ptr[offset * 4 + 1] = 0;
			ptr[offset * 4 + 2] = 0;
			ptr[offset * 4 + 3] = 255;
		}
	}
}

int main()
{
	CPUBitmap bitmap(DIM, DIM);
	unsigned char* ptr = bitmap.get_ptr();
	kernel(ptr);	//運行渲染
	bitmap.display_and_exit();
}

GPU Julia Set

注意由于內核函式是global的，要在GPU上運行需要將其呼叫的julia函式加上device，又因為，device函式只能由device函式或者global函式呼叫，所以最好把結構體中的所有函式都加上device，

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include "device_atomic_functions.h"
#include <cuda.h>
#include "book.h"
#include <cpu_bitmap.h>
#include <stdio.h>

//小于65536
#define DIM 1000		//影像的像素邊長大小

struct cuComplex
{
	float r;
	float i;
	__device__ cuComplex(float a, float b) : r(a), i(b) {}
	__device__ float magnitude2() { return r * r + i * i; }	//計算復數的模值
	__device__ cuComplex operator* (const cuComplex& a)
	{
		return cuComplex(r * a.r - i * a.i, i * a.r + r * a.i);
	}
	__device__ cuComplex operator+ (const cuComplex& a)
	{
		return cuComplex(r + a.r, i + a.i);
	}
};

__device__ int julia(int x, int y)
{
	const float scale = 1.5;	//放大倍率
	float jx = scale * (float)(DIM / 2 - x) / (DIM / 2);	//坐標變換，投影到-1~1scale
	float jy = scale * (float)(DIM / 2 - y) / (DIM / 2);
	cuComplex c(-0.8, 0.156);	//基數
	cuComplex a(jx, jy);
	int i = 0;
	for (i = 0; i < 200; i++)	//迭代
	{
		a = a * a + c;
		if (a.magnitude2() > 1000)
			return 0;
	}
	return 1;
}

__global__ void kernel(unsigned char* ptr)
{
	int x = blockIdx.x;	//縱向執行緒索引(x方向朝右，是行)
	int y = blockIdx.y;	//縱向執行緒索引(y方向朝下，是列）
	int offset = x + y * gridDim.x;
	int juliaValue = https://www.cnblogs.com/fishmingee/p/julia(x, y);
	ptr[offset * 4 + 0] = 255 * juliaValue;
	ptr[offset * 4 + 1] = 0;
	ptr[offset * 4 + 2] = 0;
	ptr[offset * 4 + 3] = 255;
}


int main()
{
	CPUBitmap bitmap(DIM, DIM);
	unsigned char* dev_bitmap;
    //在GPU中分配空間
	HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, bitmap.image_size()));
	dim3 grid(DIM, DIM);	//dim3結構體
	kernel <<>> (dev_bitmap);	//一個執行緒塊中的執行緒網路1000x1000
	HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost));	//將dev_bitmap中的內容從device拷貝到cpu中
	bitmap.display_and_exit();
	HANDLE_ERROR(cudaFree(dev_bitmap));
}

參考資料

轉載請註明出處，本文鏈接：https://www.uj5u.com/qita/14265.html

標籤：其他

上一篇：Image Retargeting - 影像縮略圖影像重定向

下一篇：qt creator原始碼全方面分析(2-10-2)