8. Implementing a Fast DDOF Solver (2/4)

Search in book...
Toggle Font Controls
Create new playlist

Name your new playlist

Playlist description (optional)
Sign In

Email address

Password

Forgot Password?

or

Continue with Facebook

Continue with Google
Sign Up

Full Name

Email address

Confirm Email Address

Password

or

Continue with Facebook

Continue with Google

124 8.ImplementingaFastDDOFSolver

struct ReduceO

{

float4 abc : SV_TARGET0;

float4 x : SV_TARGET1;

};

ReduceO InitialReduceHorz4(PS_INPUT input)

{

ReduceO output;

float fPosX = floor(input.Pos.x) * 4.0 + 3.0;

int3 i3LP = int3(fPosX, input.Pos.y, 0);

// Phase 1: Gather and compute all data necessary

// for the four-to-one reduction.

// Compute the CoC values in the support needed for

// the four-to-one reduction.

float fCoC_4 = computeCoC(i3LP, int2(-4, 0));

float fCoC_3 = computeCoC(i3LP, int2(-3, 0));

float fCoC_2 = computeCoC(i3LP, int2(-2, 0));

float fCoC_1 = computeCoC(i3LP, int2(-1, 0));

float fCoC0 = computeCoC(i3LP, int2(0, 0));

float fCoC1 = computeCoC(i3LP, int2(1, 0));

float fCoC2 = computeCoC(i3LP, int2(2, 0));

float fCoC3 = computeCoC(i3LP, int2(3, 0));

float fCoC4 = computeCoC(i3LP, int2(4, 0));

// Ensure insulation at the image borders by setting

// the CoC to 0 outside the image.

fCoC_4 = (fPosX - 4.0 >= 0.0) ? fCoC_4 : 0.0;

fCoC_3 = (fPosX - 3.0 >= 0.0) ? fCoC_3 : 0.0;

fCoC4 = (fPosX + 4.0) < g_vImageSize.x ? fCoC4 : 0.0;

fCoC3 = (fPosX + 3.0) < g_vImageSize.x ? fCoC3 : 0.0;

fCoC2 = (fPosX + 2.0) < g_vImageSize.x ? fCoC2 : 0.0;

fCoC1 = (fPosX + 1.0) < g_vImageSize.x ? fCoC1 : 0.0;

// Use the minimum CoC as the real CoC as described in Kass et al.

float fRealCoC_4 = min(fCoC_4, fCoC_3);

float fRealCoC_3 = min(fCoC_3, fCoC_2);

8.2ModifyingtheBasicCRSolver 125

float fRealCoC_2 = min(fCoC_2, fCoC_1);

float fRealCoC_1 = min(fCoC_1, fCoC0);

float fRealCoC0 = min(fCoC0, fCoC1);

float fRealCoC1 = min(fCoC1, fCoC2);

float fRealCoC2 = min(fCoC2, fCoC3);

float fRealCoC3 = min(fCoC3, fCoC4);

// Compute beta values interpreting the CoCs as the diameter.

float bt_4 = fRealCoC_4 * fRealCoC_4;

float bt_3 = fRealCoC_3 * fRealCoC_3;

float bt_2 = fRealCoC_2 * fRealCoC_2;

float bt_1 = fRealCoC_1 * fRealCoC_1;

float bt0 = fRealCoC0 * fRealCoC0;

float bt1 = fRealCoC1 * fRealCoC1;

float bt2 = fRealCoC2 * fRealCoC2;

float bt3 = fRealCoC3 * fRealCoC3;

// Now compute the a, b, c and load the x in the support

// region of the four-to-one reduction.

float3 abc_3 = float3(-bt_4, 1.0 + bt_3 + bt_4, -bt_3);

float3 x_3 = txX.Load(i3LP, int2(-3, 0)).xyz;

float3 abc_2 = float3(-bt_3 ,1.0 + bt_2 + bt_3, -bt_2);

float3 x_2 = txX.Load(i3LP, int2(-2, 0)).xyz;

float3 abc_1 = float3(-bt_2, 1.0 + bt_1 + bt_2, -bt_1);

float3 x_1 = txX.Load(i3LP, int2(-1, 0)).xyz;

float3 abc0 = float3(-bt_1, 1.0 + bt0 + bt_1, -bt0);

float3 x0 = txX.Loadi3LP, int2(0, 0)).xyz;

float3 abc1 = float3(-bt0, 1.0 + bt1 + bt0, -bt1);

float3 x1 = txX.Load(i3LP, int2(1, 0)).xyz;

float3 abc2 = float3(-bt1, 1.0 + bt2 + bt1, -bt2);

float3 x2 = txX.Load(i3LP, int2(2, 0)).xyz;

float3 abc3 = float3(-bt2, 1.0 + bt3 + bt2, -bt3);

float3 x3 = txX.Load(i3LP, int2(3, 0)).xyz;

// Phase 2: Reduce all the data by doing all two-to-one

126 8.ImplementingaFastDDOFSolver

// reductions to get to the next reduction level.

float a_1 = -abc_2.x / abc_3.y;

float g_1 = -abc_2.z / abc_1.y;

float a0 = -abc0.x / abc_1.y;

float g0 = -abc0.z / abc1.y;

float a1 = -abc2.x / abc1.y;

float g1 = -abc2.z / abc3.y;

float3 abc_p = float3(a_1 * abc_3.x,

abc_2.y + a_1 * abc_3.z + g_1 * abc_1.x, g_1 * abc_1.z);

float3 x_p = float3(x_2 + a_1 * x_3 + g_1 * x_1);

float3 abc_c = float3(a0 * abc_1.x,

abc0.y + a0 * abc_1.z + g0 * abc1.x, g0 * abc1.z);

float3 x_c = float3( x0 + a0 * x_1 + g0 * x1);

float3 abc_n = float3(a1 * abc1.x,

abc2.y + a1 * abc1.z + g1 * abc3.x, g1 * abc3.z);

float3 x_n = float3(x2 + a1 * x1 + g1 * x3);

// Phase 3: Do the final two-to-one reduction to complete

// the four-to-one reduction.

float a = -abc_c.x / abc_p.y;

float g = -abc_c.z / abc_n.y;

float3 res0 = float3(a * abc_p.x,

abc_c.y + a * abc_p.z + g * abc_n.x, g * abc_n.z);

float3 res1 = float3(x_c + a * x_p + g * x_n);

output.abc = float4(res0, 0.0);

output.x = float4(res1, 0.0);

return (output);

}

Listing 8.1. Horizontal four-to-one reduction.

3. Perform a final one-to-four solving pass to deal with the initial four-to-one

reduction pass. Again, a very hands-on approach for solving the problem at

hand is used, and it also has three phases. Since an initial four-to-one reduc-

tion shader was used, we don’t have all the data available to perform the

8.2ModifyingtheBasicCRSolver 127

needed one-to-four solving pass. Phase 1 of the shader therefore starts to re-

construct the missing data from the unchanged and full-resolution input data

in the same fashion that was used in Listing 8.1. Phase 2 uses this data to per-

form several one-to-two solving steps to produce the missing

values of the

intermediate pass that we skip. Phase 3 finally uses all that data to produce

the final result. Listing 8.2 shows a shader model 4 code fragment imple-

menting the corresponding algorithm for that final solver stage. Again, only

the code for the horizontal version of the algorithm is shown.

float4 FinalSolveHorz4(PS_INPUT input) : SV_TARGET

{

// First reconstruct the level 1 x, abc.

float fPosX = floor(input.Pos.x * 0.25) * 4.0 + 3.0;

int3 i3LoadPos = int3(fPosX, input.Pos.y, 0);

// Phase 1: Gather data to reconstruct intermediate data

// lost when skipping the first two-to-one reduction step

// of the original solver.

float fCoC_5 = computeCoC(i3LoadPos, int2(-5, 0));

float fCoC_4 = computeCoC(i3LoadPos, int2(-4, 0));

float fCoC_3 = computeCoC(i3LoadPos, int2(-3, 0));

float fCoC_2 = computeCoC(i3LoadPos, int2(-2, 0));

float fCoC_1 = computeCoC(i3LoadPos, int2(-1, 0));

float fCoC0 = computeCoC(i3LoadPos, int2(0, 0));

float fCoC1 = computeCoC(i3LoadPos, int2(1, 0));

float fCoC2 = computeCoC(i3LoadPos, int2(2, 0));

float fCoC3 = computeCoC(i3LoadPos, int2(3, 0));

float fCoC4 = computeCoC(i3LoadPos, int2(4, 0));

fCoC_5 = (fPosX - 5.0 >= 0.0) ? fCoC_5 : 0.0;

fCoC_4 = (fPosX - 4.0 >= 0.0) ? fCoC_4 : 0.0;

fCoC_3 = (fPosX - 3.0 >= 0.0) ? fCoC_3 : 0.0;

fCoC4 = (fPosX + 4.0 < g_vImageSize.x) ? fCoC4: 0.0;

fCoC3 = (fPosX + 3.0 < g_vImageSize.x) ? fCoC3 : 0.0;

fCoC2 = (fPosX + 2.0 < g_vImageSize.x) ? fCoC2 : 0.0;

fCoC1 = (fPosX + 1.0 < g_vImageSize.x) ? fCoC1 : 0.0;

float fRealCoC_5 = min(fCoC_5, fCoC_4);

float fRealCoC_4 = min(fCoC_4, fCoC_3);

float fRealCoC_3 = min(fCoC_3, fCoC_2);

128 8.ImplementingaFastDDOFSolver

float fRealCoC_2 = min(fCoC_2, fCoC_1);

float fRealCoC_1 = min(fCoC_1, fCoC0);

float fRealCoC0 = min(fCoC0, fCoC1);

float fRealCoC1 = min(fCoC1, fCoC2);

float fRealCoC2 = min(fCoC2, fCoC3);

float fRealCoC3 = min(fCoC3, fCoC4);

float b_5 = fRealCoC_5 * fRealCoC_5;

float b_4 = fRealCoC_4 * fRealCoC_4;

float b_3 = fRealCoC_3 * fRealCoC_3;

float b_2 = fRealCoC_2 * fRealCoC_2;

float b_1 = fRealCoC_1 * fRealCoC_1;

float b0 = fRealCoC0 * fRealCoC0;

float b1 = fRealCoC1 * fRealCoC1;

float b2 = fRealCoC2 * fRealCoC2;

float b3 = fRealCoC3 * fRealCoC3;

float3 abc_4 = float3(-b_5, 1.0 + b_4 + b_5, -b_4);

float3 x_4 = txX.Load(i3LoadPos, int2(-4, 0)).xyz;

float3 abc_3 = float3(-b_4, 1.0 + b_3 + b_4, -b_3);

float3 x_3 = txX.Load(i3LoadPos, int2(-3, 0)).xyz;

float3 abc_2 = float3(-b_3, 1.0 + b_2 + b_3, -b_2);

float3 x_2 = txX.Load(i3LoadPos, int2(-2, 0)).xyz;

float3 abc_1 = float3(-b_2, 1.0 + b_1 + b_2, -b_1);

float3 x_1 = xX.Load(i3LP, int2(-1, 0)).xyz;

float3 abc0 = float3(-b_1, 1.0 + b0 + b_1, -b0);

float3 x0 = txX.Load(i3LP, int2(0, 0)).xyz;

float3 abc1 = float3(-b0, 1.0 + b1 + b0, -b1);

float3 x1 = txX.Load(i3LP, int2(1, 0)).xyz;

float3 abc2 = float3(-b1, 1.0 + b2 + b1, -b2);

float3 x2 = txX.Load(i3LP, int2(2, 0)).xyz;

float3 abc3 = float3(-b2, 1.0 + b3 + b2, -b3);

float3 x3 = txX.Load(i3LP, int2(3, 0)).xyz;

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.

Table of Contents for 8. Implementing a Fast DDOF Solver (2/4)

Create new playlist

Sign In

Sign Up

Table of Contents for
8. Implementing a Fast DDOF Solver (2/4)