00001 #include "luxrays/kernels/kernels.h"
00002 std::string luxrays::KernelSource_Pixel_AddSampleBufferGaussian2x2 =
00003 "/***************************************************************************\n"
00004 " * Copyright (C) 1998-2010 by authors (see AUTHORS.txt ) *\n"
00005 " * *\n"
00006 " * This file is part of LuxRays. *\n"
00007 " * *\n"
00008 " * LuxRays is free software; you can redistribute it and/or modify *\n"
00009 " * it under the terms of the GNU General Public License as published by *\n"
00010 " * the Free Software Foundation; either version 3 of the License, or *\n"
00011 " * (at your option) any later version. *\n"
00012 " * *\n"
00013 " * LuxRays is distributed in the hope that it will be useful, *\n"
00014 " * but WITHOUT ANY WARRANTY; without even the implied warranty of *\n"
00015 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *\n"
00016 " * GNU General Public License for more details. *\n"
00017 " * *\n"
00018 " * You should have received a copy of the GNU General Public License *\n"
00019 " * along with this program. If not, see <http://www.gnu.org/licenses/>. *\n"
00020 " * *\n"
00021 " * LuxRays website: http://www.luxrender.net *\n"
00022 " ***************************************************************************/\n"
00023 "\n"
00024 "// NOTE: this kernel assume samples do not overlap\n"
00025 "\n"
00026 "#define FILTER_TABLE_SIZE 16\n"
00027 "#define Gaussian2x2_xWidth 2.f\n"
00028 "#define Gaussian2x2_yWidth 2.f\n"
00029 "#define Gaussian2x2_invXWidth (1.f / Gaussian2x2_xWidth)\n"
00030 "#define Gaussian2x2_invYWidth (1.f / Gaussian2x2_invXWidth)\n"
00031 "\n"
00032 "typedef struct {\n"
00033 " float r, g, b;\n"
00034 "} Spectrum;\n"
00035 "\n"
00036 "typedef struct {\n"
00037 " Spectrum radiance;\n"
00038 " float weight;\n"
00039 "} SamplePixel;\n"
00040 "\n"
00041 "typedef struct {\n"
00042 " float screenX, screenY;\n"
00043 " Spectrum radiance;\n"
00044 "} SampleBufferElem;\n"
00045 "\n"
00046 "int Ceil2Int(const float val) {\n"
00047 " return (int)ceil(val);\n"
00048 "}\n"
00049 "\n"
00050 "int Floor2Int(const float val) {\n"
00051 " return (int)floor(val);\n"
00052 "}\n"
00053 "\n"
00054 "void AddSample(__global SamplePixel *sp, const float4 sample) {\n"
00055 " float4 weight = (float4)(sample.w, sample.w, sample.w, 1.f);\n"
00056 " __global float4 *p = (__global float4 *)sp;\n"
00057 " *p += weight * sample;\n"
00058 "}\n"
00059 "\n"
00060 "__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void PixelAddSampleBufferGaussian2x2(\n"
00061 " const unsigned int width,\n"
00062 " const unsigned int height,\n"
00063 " __global SamplePixel *sampleFrameBuffer,\n"
00064 " const unsigned int sampleCount,\n"
00065 " __global SampleBufferElem *sampleBuff,\n"
00066 " __constant __attribute__((max_constant_size(sizeof(float) * FILTER_TABLE_SIZE * FILTER_TABLE_SIZE))) float *Gaussian2x2_filterTable) {\n"
00067 " const unsigned int index = get_global_id(0);\n"
00068 " if (index >= sampleCount)\n"
00069 " return;\n"
00070 "\n"
00071 " __global SampleBufferElem *sampleElem = &sampleBuff[index];\n"
00072 " const float4 sample = (float4)(sampleElem->radiance.r, sampleElem->radiance.g, sampleElem->radiance.b, 1.f);\n"
00073 "\n"
00074 " const float dImageX = sampleElem->screenX - 0.5f;\n"
00075 " const float dImageY = sampleElem->screenY - 0.5f;\n"
00076 " const int x0 = Ceil2Int(dImageX - Gaussian2x2_xWidth);\n"
00077 " const int x1 = Floor2Int(dImageX + Gaussian2x2_xWidth);\n"
00078 " const int y0 = Ceil2Int(dImageY - Gaussian2x2_yWidth);\n"
00079 " const int y1 = Floor2Int(dImageY + Gaussian2x2_yWidth);\n"
00080 " if (x1 < x0 || y1 < y0 || x1 < 0 || y1 < 0)\n"
00081 " return;\n"
00082 "\n"
00083 " // Loop over filter support and add sample to pixel arrays\n"
00084 " __local int ifxBuff[FILTER_TABLE_SIZE * 64];\n"
00085 " __local int *ifx = &(ifxBuff[FILTER_TABLE_SIZE * get_local_id(0)]);\n"
00086 " for (int x = x0; x <= x1; ++x) {\n"
00087 " const float fx = fabs((x - dImageX) *\n"
00088 " Gaussian2x2_invXWidth * FILTER_TABLE_SIZE);\n"
00089 " ifx[x - x0] = min(Floor2Int(fx), (int)FILTER_TABLE_SIZE - 1);\n"
00090 " }\n"
00091 "\n"
00092 " __local int ifyBuff[FILTER_TABLE_SIZE * 64];\n"
00093 " __local int *ify = &(ifyBuff[FILTER_TABLE_SIZE * get_local_id(0)]);\n"
00094 " for (int y = y0; y <= y1; ++y) {\n"
00095 " const float fy = fabs((y - dImageY) *\n"
00096 " Gaussian2x2_invYWidth * FILTER_TABLE_SIZE);\n"
00097 " ify[y - y0] = min(Floor2Int(fy), (int)FILTER_TABLE_SIZE - 1);\n"
00098 " }\n"
00099 "\n"
00100 " float filterNorm = 0.f;\n"
00101 " for (int y = y0; y <= y1; ++y) {\n"
00102 " for (int x = x0; x <= x1; ++x) {\n"
00103 " const int offset = ify[y - y0] * FILTER_TABLE_SIZE + ifx[x - x0];\n"
00104 " filterNorm += Gaussian2x2_filterTable[offset];\n"
00105 " }\n"
00106 " }\n"
00107 " filterNorm = 1.f / filterNorm;\n"
00108 "\n"
00109 " const int fx0 = max(x0, 0);\n"
00110 " const int fx1 = min(x1, (int)width - 1);\n"
00111 " const int fy0 = max(y0, 0);\n"
00112 " const int fy1 = min(y1, (int)height - 1);\n"
00113 "\n"
00114 " for (int y = fy0; y <= fy1; ++y) {\n"
00115 " const unsigned int offset = y * width;\n"
00116 "\n"
00117 " for (int x = fx0; x <= fx1; ++x) {\n"
00118 " const int tabOffset = ify[y - y0] * FILTER_TABLE_SIZE + ifx[x - x0];\n"
00119 " sample.w = Gaussian2x2_filterTable[tabOffset] * filterNorm;\n"
00120 "\n"
00121 " AddSample(&sampleFrameBuffer[offset + x], sample);\n"
00122 " }\n"
00123 " }\n"
00124 "}\n"
00125 ;