// Each work-item will filter around its start location
// (starting from half the filter size left and up)
for(int i = localRow; i < localRow+filterWidth; i++) {
int offset = i*localWidth;
for(int j = localCol; j < localCol+filterWidth; j++){
sum += localImage[offset+j] *
for(int i = localRow; i < localRow+filterWidth; i++) {
int offset = i*localWidth+localCol;
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
sum += localImage[offset++] * filter[filterIdx++];
int offset = localRow*localWidth+localCol;
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++]; sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
sum += localImage[offset+0] * filter[filterIdx++];
sum += localImage[offset+1] * filter[filterIdx++];
sum += localImage[offset+2] * filter[filterIdx++];
sum += localImage[offset+3] * filter[filterIdx++];
sum += localImage[offset+4] * filter[filterIdx++];
sum += localImage[offset+5] * filter[filterIdx++];
sum += localImage[offset+6] * filter[filterIdx++];
imageOut[(globalRow+filterRadius)*cols +
(globalCol+filterRadius)] = sum;