8.2ModifyingtheBasicCRSolver 127
needed one-to-four solving pass. Phase 1 of the shader therefore starts to re-
construct the missing data from the unchanged and full-resolution input data
in the same fashion that was used in Listing 8.1. Phase 2 uses this data to per-
form several one-to-two solving steps to produce the missing
i
values of the
intermediate pass that we skip. Phase 3 finally uses all that data to produce
the final result. Listing 8.2 shows a shader model 4 code fragment imple-
menting the corresponding algorithm for that final solver stage. Again, only
the code for the horizontal version of the algorithm is shown.
float4 FinalSolveHorz4(PS_INPUT input) : SV_TARGET
{
// First reconstruct the level 1 x, abc.
float fPosX = floor(input.Pos.x * 0.25) * 4.0 + 3.0;
int3 i3LoadPos = int3(fPosX, input.Pos.y, 0);
// Phase 1: Gather data to reconstruct intermediate data
// lost when skipping the first two-to-one reduction step
// of the original solver.
float fCoC_5 = computeCoC(i3LoadPos, int2(-5, 0));
float fCoC_4 = computeCoC(i3LoadPos, int2(-4, 0));
float fCoC_3 = computeCoC(i3LoadPos, int2(-3, 0));
float fCoC_2 = computeCoC(i3LoadPos, int2(-2, 0));
float fCoC_1 = computeCoC(i3LoadPos, int2(-1, 0));
float fCoC0 = computeCoC(i3LoadPos, int2(0, 0));
float fCoC1 = computeCoC(i3LoadPos, int2(1, 0));
float fCoC2 = computeCoC(i3LoadPos, int2(2, 0));
float fCoC3 = computeCoC(i3LoadPos, int2(3, 0));
float fCoC4 = computeCoC(i3LoadPos, int2(4, 0));
fCoC_5 = (fPosX - 5.0 >= 0.0) ? fCoC_5 : 0.0;
fCoC_4 = (fPosX - 4.0 >= 0.0) ? fCoC_4 : 0.0;
fCoC_3 = (fPosX - 3.0 >= 0.0) ? fCoC_3 : 0.0;
fCoC4 = (fPosX + 4.0 < g_vImageSize.x) ? fCoC4: 0.0;
fCoC3 = (fPosX + 3.0 < g_vImageSize.x) ? fCoC3 : 0.0;
fCoC2 = (fPosX + 2.0 < g_vImageSize.x) ? fCoC2 : 0.0;
fCoC1 = (fPosX + 1.0 < g_vImageSize.x) ? fCoC1 : 0.0;
float fRealCoC_5 = min(fCoC_5, fCoC_4);
float fRealCoC_4 = min(fCoC_4, fCoC_3);
float fRealCoC_3 = min(fCoC_3, fCoC_2);