/*********************************************************** * A single header file OpenGL lightmapping library * * https://github.com/ands/lightmapper * * no warranty implied | use at your own risk * * author: Andreas Mantler (ands) | last change: 10.05.2018 * * * * License: * * This software is in the public domain. * * Where that dedication is not recognized, * * you are granted a perpetual, irrevocable license to copy * * and modify this file however you want. * ***********************************************************/ #ifndef LIGHTMAPPER_H #define LIGHTMAPPER_H #ifdef __cplusplus #define LM_DEFAULT_VALUE(value) = value #else #define LM_DEFAULT_VALUE(value) #endif #ifndef LM_CALLOC #define LM_CALLOC(count, size) calloc(count, size) #endif #ifndef LM_FREE #define LM_FREE(ptr) free(ptr) #endif typedef int lm_bool; #define LM_FALSE 0 #define LM_TRUE 1 typedef int lm_type; #define LM_NONE 0 #define LM_UNSIGNED_BYTE GL_UNSIGNED_BYTE #define LM_UNSIGNED_SHORT GL_UNSIGNED_SHORT #define LM_UNSIGNED_INT GL_UNSIGNED_INT #define LM_FLOAT GL_FLOAT typedef struct lm_context lm_context; // creates a lightmapper instance. it can be used to render multiple lightmaps. lm_context *lmCreate( int hemisphereSize, // hemisphereSize: resolution of the hemisphere renderings. must be a power of two! typical: 64. float zNear, float zFar, // zNear/zFar: hemisphere min/max draw distances. float clearR, float clearG, float clearB, // clear color / background color / sky color. int interpolationPasses, float interpolationThreshold, // passes: hierarchical selective interpolation passes (0-8; initial step size = 2^passes). // threshold: error value below which lightmap pixels are interpolated instead of rendered. // use output image from LM_DEBUG_INTERPOLATION to determine a good value. // values around and below 0.01 are probably ok. // the lower the value, the more hemispheres are rendered -> slower, but possibly better quality. float cameraToSurfaceDistanceModifier LM_DEFAULT_VALUE(0.0f)); // modifier for the height of the rendered hemispheres above the surface // -1.0f => stick to surface, 0.0f => minimum height for interpolated surface normals, // > 0.0f => improves gradients on surfaces with interpolated normals due to the flat surface horizon, // but may introduce other artifacts. // optional: set material characteristics by specifying cos(theta)-dependent weights for incoming light. typedef float (*lm_weight_func)(float cos_theta, void *userdata); void lmSetHemisphereWeights(lm_context *ctx, lm_weight_func f, void *userdata); // precalculates weights for incoming light depending on its angle. (default: all weights are 1.0f) // specify an output lightmap image buffer with w * h * c * sizeof(float) bytes of memory. void lmSetTargetLightmap(lm_context *ctx, float *outLightmap, int w, int h, int c); // output HDR lightmap (linear 32bit float channels; c: 1->Greyscale, 2->Greyscale+Alpha, 3->RGB, 4->RGBA). // set the geometry to map to the currently set target lightmap (set the target lightmap before calling this!). void lmSetGeometry(lm_context *ctx, const float *transformationMatrix, // 4x4 object-to-world transform for the geometry or NULL (no transformation). lm_type positionsType, const void *positionsXYZ, int positionsStride, // triangle mesh in object space. lm_type normalsType, const void *normalsXYZ, int normalsStride, // optional normals for the mesh in object space (Use LM_NONE type in case you only need flat surfaces). lm_type lightmapCoordsType, const void *lightmapCoordsUV, int lightmapCoordsStride, // lightmap atlas texture coordinates for the mesh [0..1]x[0..1] (integer types are normalized to 0..1 range). int count, lm_type indicesType LM_DEFAULT_VALUE(LM_NONE), const void *indices LM_DEFAULT_VALUE(0));// if mesh indices are used, count = number of indices else count = number of vertices. // as long as lmBegin returns true, the scene has to be rendered with the // returned camera and view parameters to the currently bound framebuffer. // if lmBegin returns true, it must be followed by lmEnd after rendering! lm_bool lmBegin(lm_context *ctx, int* outViewport4, // output of the current viewport: { x, y, w, h }. use these to call glViewport()! float* outView4x4, // output of the current camera view matrix. float* outProjection4x4); // output of the current camera projection matrix. float lmProgress(lm_context *ctx); // should only be called between lmBegin/lmEnd! // provides the light mapping progress as a value increasing from 0.0 to 1.0. void lmEnd(lm_context *ctx); // destroys the lightmapper instance. should be called to free resources. void lmDestroy(lm_context *ctx); // image based post processing (c is the number of color channels in the image, m a channel mask for the operation) #define LM_ALL_CHANNELS 0x0f float lmImageMin(const float *image, int w, int h, int c, int m LM_DEFAULT_VALUE(LM_ALL_CHANNELS)); // find the minimum value (across the specified channels) float lmImageMax(const float *image, int w, int h, int c, int m LM_DEFAULT_VALUE(LM_ALL_CHANNELS)); // find the maximum value (across the specified channels) void lmImageAdd(float *image, int w, int h, int c, float value, int m LM_DEFAULT_VALUE(LM_ALL_CHANNELS)); // in-place add to the specified channels void lmImageScale(float *image, int w, int h, int c, float factor, int m LM_DEFAULT_VALUE(LM_ALL_CHANNELS)); // in-place scaling of the specified channels void lmImagePower(float *image, int w, int h, int c, float exponent, int m LM_DEFAULT_VALUE(LM_ALL_CHANNELS)); // in-place powf(v, exponent) of the specified channels (for gamma) void lmImageDilate(const float *image, float *outImage, int w, int h, int c); // widen the populated non-zero areas by 1 pixel. void lmImageSmooth(const float *image, float *outImage, int w, int h, int c); // simple box filter on only the non-zero values. void lmImageDownsample(const float *image, float *outImage, int w, int h, int c); // downsamples [0..w]x[0..h] to [0..w/2]x[0..h/2] by avereging only the non-zero values void lmImageFtoUB(const float *image, unsigned char *outImage, int w, int h, int c, float max LM_DEFAULT_VALUE(0.0f)); // casts a floating point image to an 8bit/channel image // TGA file output helpers lm_bool lmImageSaveTGAub(const char *filename, const unsigned char *image, int w, int h, int c); lm_bool lmImageSaveTGAf(const char *filename, const float *image, int w, int h, int c, float max LM_DEFAULT_VALUE(0.0f)); #endif ////////////////////// END OF HEADER ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #ifdef LIGHTMAPPER_IMPLEMENTATION #undef LIGHTMAPPER_IMPLEMENTATION #include #include #include #include #include #include #define LM_SWAP(type, a, b) { type tmp = (a); (a) = (b); (b) = tmp; } #if defined(_MSC_VER) && !defined(__cplusplus) #define inline __inline #endif #if defined(_MSC_VER) && (_MSC_VER <= 1700) static inline lm_bool lm_finite(float a) { return _finite(a); } #else static inline lm_bool lm_finite(float a) { return isfinite(a); } #endif static inline int lm_mini (int a, int b) { return a < b ? a : b; } static inline int lm_maxi (int a, int b) { return a > b ? a : b; } static inline int lm_absi (int a ) { return a < 0 ? -a : a; } static inline float lm_minf (float a, float b) { return a < b ? a : b; } static inline float lm_maxf (float a, float b) { return a > b ? a : b; } static inline float lm_absf (float a ) { return a < 0.0f ? -a : a; } static inline float lm_pmodf (float a, float b) { return (a < 0.0f ? 1.0f : 0.0f) + (float)fmod(a, b); } // positive mod typedef struct lm_ivec2 { int x, y; } lm_ivec2; static inline lm_ivec2 lm_i2 (int x, int y) { lm_ivec2 v = { x, y }; return v; } typedef struct lm_vec2 { float x, y; } lm_vec2; static inline lm_vec2 lm_v2i (int x, int y) { lm_vec2 v = { (float)x, (float)y }; return v; } static inline lm_vec2 lm_v2 (float x, float y) { lm_vec2 v = { x, y }; return v; } static inline lm_vec2 lm_negate2 (lm_vec2 a ) { return lm_v2(-a.x, -a.y); } static inline lm_vec2 lm_add2 (lm_vec2 a, lm_vec2 b) { return lm_v2(a.x + b.x, a.y + b.y); } static inline lm_vec2 lm_sub2 (lm_vec2 a, lm_vec2 b) { return lm_v2(a.x - b.x, a.y - b.y); } static inline lm_vec2 lm_mul2 (lm_vec2 a, lm_vec2 b) { return lm_v2(a.x * b.x, a.y * b.y); } static inline lm_vec2 lm_scale2 (lm_vec2 a, float b) { return lm_v2(a.x * b, a.y * b); } static inline lm_vec2 lm_div2 (lm_vec2 a, float b) { return lm_scale2(a, 1.0f / b); } static inline lm_vec2 lm_pmod2 (lm_vec2 a, float b) { return lm_v2(lm_pmodf(a.x, b), lm_pmodf(a.y, b)); } static inline lm_vec2 lm_min2 (lm_vec2 a, lm_vec2 b) { return lm_v2(lm_minf(a.x, b.x), lm_minf(a.y, b.y)); } static inline lm_vec2 lm_max2 (lm_vec2 a, lm_vec2 b) { return lm_v2(lm_maxf(a.x, b.x), lm_maxf(a.y, b.y)); } static inline lm_vec2 lm_abs2 (lm_vec2 a ) { return lm_v2(lm_absf(a.x), lm_absf(a.y)); } static inline lm_vec2 lm_floor2 (lm_vec2 a ) { return lm_v2(floorf(a.x), floorf(a.y)); } static inline lm_vec2 lm_ceil2 (lm_vec2 a ) { return lm_v2(ceilf (a.x), ceilf (a.y)); } static inline float lm_dot2 (lm_vec2 a, lm_vec2 b) { return a.x * b.x + a.y * b.y; } static inline float lm_cross2 (lm_vec2 a, lm_vec2 b) { return a.x * b.y - a.y * b.x; } // pseudo cross product static inline float lm_length2sq (lm_vec2 a ) { return a.x * a.x + a.y * a.y; } static inline float lm_length2 (lm_vec2 a ) { return sqrtf(lm_length2sq(a)); } static inline lm_vec2 lm_normalize2(lm_vec2 a ) { return lm_div2(a, lm_length2(a)); } static inline lm_bool lm_finite2 (lm_vec2 a ) { return lm_finite(a.x) && lm_finite(a.y); } typedef struct lm_vec3 { float x, y, z; } lm_vec3; static inline lm_vec3 lm_v3 (float x, float y, float z) { lm_vec3 v = { x, y, z }; return v; } static inline lm_vec3 lm_negate3 (lm_vec3 a ) { return lm_v3(-a.x, -a.y, -a.z); } static inline lm_vec3 lm_add3 (lm_vec3 a, lm_vec3 b) { return lm_v3(a.x + b.x, a.y + b.y, a.z + b.z); } static inline lm_vec3 lm_sub3 (lm_vec3 a, lm_vec3 b) { return lm_v3(a.x - b.x, a.y - b.y, a.z - b.z); } static inline lm_vec3 lm_mul3 (lm_vec3 a, lm_vec3 b) { return lm_v3(a.x * b.x, a.y * b.y, a.z * b.z); } static inline lm_vec3 lm_scale3 (lm_vec3 a, float b) { return lm_v3(a.x * b, a.y * b, a.z * b); } static inline lm_vec3 lm_div3 (lm_vec3 a, float b) { return lm_scale3(a, 1.0f / b); } static inline lm_vec3 lm_pmod3 (lm_vec3 a, float b) { return lm_v3(lm_pmodf(a.x, b), lm_pmodf(a.y, b), lm_pmodf(a.z, b)); } static inline lm_vec3 lm_min3 (lm_vec3 a, lm_vec3 b) { return lm_v3(lm_minf(a.x, b.x), lm_minf(a.y, b.y), lm_minf(a.z, b.z)); } static inline lm_vec3 lm_max3 (lm_vec3 a, lm_vec3 b) { return lm_v3(lm_maxf(a.x, b.x), lm_maxf(a.y, b.y), lm_maxf(a.z, b.z)); } static inline lm_vec3 lm_abs3 (lm_vec3 a ) { return lm_v3(lm_absf(a.x), lm_absf(a.y), lm_absf(a.z)); } static inline lm_vec3 lm_floor3 (lm_vec3 a ) { return lm_v3(floorf(a.x), floorf(a.y), floorf(a.z)); } static inline lm_vec3 lm_ceil3 (lm_vec3 a ) { return lm_v3(ceilf (a.x), ceilf (a.y), ceilf (a.z)); } static inline float lm_dot3 (lm_vec3 a, lm_vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } static inline lm_vec3 lm_cross3 (lm_vec3 a, lm_vec3 b) { return lm_v3(a.y * b.z - b.y * a.z, a.z * b.x - b.z * a.x, a.x * b.y - b.x * a.y); } static inline float lm_length3sq (lm_vec3 a ) { return a.x * a.x + a.y * a.y + a.z * a.z; } static inline float lm_length3 (lm_vec3 a ) { return sqrtf(lm_length3sq(a)); } static inline lm_vec3 lm_normalize3(lm_vec3 a ) { return lm_div3(a, lm_length3(a)); } static inline lm_bool lm_finite3 (lm_vec3 a ) { return lm_finite(a.x) && lm_finite(a.y) && lm_finite(a.z); } static lm_vec2 lm_toBarycentric(lm_vec2 p1, lm_vec2 p2, lm_vec2 p3, lm_vec2 p) { // http://www.blackpawn.com/texts/pointinpoly/ // Compute vectors lm_vec2 v0 = lm_sub2(p3, p1); lm_vec2 v1 = lm_sub2(p2, p1); lm_vec2 v2 = lm_sub2(p, p1); // Compute dot products float dot00 = lm_dot2(v0, v0); float dot01 = lm_dot2(v0, v1); float dot02 = lm_dot2(v0, v2); float dot11 = lm_dot2(v1, v1); float dot12 = lm_dot2(v1, v2); // Compute barycentric coordinates float invDenom = 1.0f / (dot00 * dot11 - dot01 * dot01); float u = (dot11 * dot02 - dot01 * dot12) * invDenom; float v = (dot00 * dot12 - dot01 * dot02) * invDenom; return lm_v2(u, v); } static inline int lm_leftOf(lm_vec2 a, lm_vec2 b, lm_vec2 c) { float x = lm_cross2(lm_sub2(b, a), lm_sub2(c, b)); return x < 0 ? -1 : x > 0; } static lm_bool lm_lineIntersection(lm_vec2 x0, lm_vec2 x1, lm_vec2 y0, lm_vec2 y1, lm_vec2* res) { lm_vec2 dx = lm_sub2(x1, x0); lm_vec2 dy = lm_sub2(y1, y0); lm_vec2 d = lm_sub2(x0, y0); float dyx = lm_cross2(dy, dx); if (dyx == 0.0f) return LM_FALSE; dyx = lm_cross2(d, dx) / dyx; if (dyx <= 0 || dyx >= 1) return LM_FALSE; res->x = y0.x + dyx * dy.x; res->y = y0.y + dyx * dy.y; return LM_TRUE; } // this modifies the poly array! the poly array must be big enough to hold the result! // res must be big enough to hold the result! static int lm_convexClip(lm_vec2 *poly, int nPoly, const lm_vec2 *clip, int nClip, lm_vec2 *res) { int nRes = nPoly; int dir = lm_leftOf(clip[0], clip[1], clip[2]); for (int i = 0, j = nClip - 1; i < nClip && nRes; j = i++) { if (i != 0) for (nPoly = 0; nPoly < nRes; nPoly++) poly[nPoly] = res[nPoly]; nRes = 0; lm_vec2 v0 = poly[nPoly - 1]; int side0 = lm_leftOf(clip[j], clip[i], v0); if (side0 != -dir) res[nRes++] = v0; for (int k = 0; k < nPoly; k++) { lm_vec2 v1 = poly[k], x; int side1 = lm_leftOf(clip[j], clip[i], v1); if (side0 + side1 == 0 && side0 && lm_lineIntersection(clip[j], clip[i], v0, v1, &x)) res[nRes++] = x; if (k == nPoly - 1) break; if (side1 != -dir) res[nRes++] = v1; v0 = v1; side0 = side1; } } return nRes; } struct lm_context { struct { const float *modelMatrix; float normalMatrix[9]; const unsigned char *positions; lm_type positionsType; int positionsStride; const unsigned char *normals; lm_type normalsType; int normalsStride; const unsigned char *uvs; lm_type uvsType; int uvsStride; const unsigned char *indices; lm_type indicesType; unsigned int count; } mesh; struct { int pass; int passCount; struct { unsigned int baseIndex; lm_vec3 p[3]; lm_vec3 n[3]; lm_vec2 uv[3]; } triangle; struct { int minx, miny; int maxx, maxy; int x, y; } rasterizer; struct { lm_vec3 position; lm_vec3 direction; lm_vec3 up; } sample; struct { int side; } hemisphere; } meshPosition; struct { int width; int height; int channels; float *data; #ifdef LM_DEBUG_INTERPOLATION unsigned char *debug; #endif } lightmap; struct { unsigned int size; float zNear, zFar; float cameraToSurfaceDistanceModifier; struct { float r, g, b; } clearColor; unsigned int fbHemiCountX; unsigned int fbHemiCountY; unsigned int fbHemiIndex; lm_ivec2 *fbHemiToLightmapLocation; GLuint fbTexture[2]; GLuint fb[2]; GLuint fbDepth; GLuint vao; struct { GLuint programID; GLuint hemispheresTextureID; GLuint weightsTextureID; GLuint weightsTexture; } firstPass; struct { GLuint programID; GLuint hemispheresTextureID; } downsamplePass; struct { GLuint texture; lm_ivec2 writePosition; int width, height; lm_ivec2 *toLightmapLocation; } storage; } hemisphere; float interpolationThreshold; }; // pass order of one 4x4 interpolation patch for two interpolation steps (and the next neighbors right of/below it) // 0 4 1 4 0 // 5 6 5 6 5 // 2 4 3 4 2 // 5 6 5 6 5 // 0 4 1 4 0 static unsigned int lm_passStepSize(lm_context *ctx) { unsigned int shift = ctx->meshPosition.passCount / 3 - (ctx->meshPosition.pass - 1) / 3; unsigned int step = (1 << shift); assert(step > 0); return step; } static unsigned int lm_passOffsetX(lm_context *ctx) { if (!ctx->meshPosition.pass) return 0; int passType = (ctx->meshPosition.pass - 1) % 3; unsigned int halfStep = lm_passStepSize(ctx) >> 1; return passType != 1 ? halfStep : 0; } static unsigned int lm_passOffsetY(lm_context *ctx) { if (!ctx->meshPosition.pass) return 0; int passType = (ctx->meshPosition.pass - 1) % 3; unsigned int halfStep = lm_passStepSize(ctx) >> 1; return passType != 0 ? halfStep : 0; } static lm_bool lm_hasConservativeTriangleRasterizerFinished(lm_context *ctx) { return ctx->meshPosition.rasterizer.y >= ctx->meshPosition.rasterizer.maxy; } static void lm_moveToNextPotentialConservativeTriangleRasterizerPosition(lm_context *ctx) { unsigned int step = lm_passStepSize(ctx); ctx->meshPosition.rasterizer.x += step; while (ctx->meshPosition.rasterizer.x >= ctx->meshPosition.rasterizer.maxx) { ctx->meshPosition.rasterizer.x = ctx->meshPosition.rasterizer.minx + lm_passOffsetX(ctx); ctx->meshPosition.rasterizer.y += step; if (lm_hasConservativeTriangleRasterizerFinished(ctx)) break; } } static float *lm_getLightmapPixel(lm_context *ctx, int x, int y) { assert(x >= 0 && x < ctx->lightmap.width && y >= 0 && y < ctx->lightmap.height); return ctx->lightmap.data + (y * ctx->lightmap.width + x) * ctx->lightmap.channels; } static void lm_setLightmapPixel(lm_context *ctx, int x, int y, float *in) { assert(x >= 0 && x < ctx->lightmap.width && y >= 0 && y < ctx->lightmap.height); float *p = ctx->lightmap.data + (y * ctx->lightmap.width + x) * ctx->lightmap.channels; for (int j = 0; j < ctx->lightmap.channels; j++) *p++ = *in++; } #define lm_baseAngle 0.1f static const float lm_baseAngles[3][3] = { { lm_baseAngle, lm_baseAngle + 1.0f / 3.0f, lm_baseAngle + 2.0f / 3.0f }, { lm_baseAngle + 1.0f / 3.0f, lm_baseAngle + 2.0f / 3.0f, lm_baseAngle }, { lm_baseAngle + 2.0f / 3.0f, lm_baseAngle, lm_baseAngle + 1.0f / 3.0f } }; static lm_bool lm_trySamplingConservativeTriangleRasterizerPosition(lm_context *ctx) { if (lm_hasConservativeTriangleRasterizerFinished(ctx)) return LM_FALSE; // check if lightmap pixel was already set float *pixelValue = lm_getLightmapPixel(ctx, ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y); for (int j = 0; j < ctx->lightmap.channels; j++) if (pixelValue[j] != 0.0f) return LM_FALSE; // try calculating centroid by clipping the pixel against the triangle lm_vec2 pixel[16]; pixel[0] = lm_v2i(ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y); pixel[1] = lm_v2i(ctx->meshPosition.rasterizer.x + 1, ctx->meshPosition.rasterizer.y); pixel[2] = lm_v2i(ctx->meshPosition.rasterizer.x + 1, ctx->meshPosition.rasterizer.y + 1); pixel[3] = lm_v2i(ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y + 1); lm_vec2 res[16]; int nRes = lm_convexClip(pixel, 4, ctx->meshPosition.triangle.uv, 3, res); if (nRes == 0) return LM_FALSE; // nothing left // calculate centroid position and area lm_vec2 centroid = res[0]; float area = res[nRes - 1].x * res[0].y - res[nRes - 1].y * res[0].x; for (int i = 1; i < nRes; i++) { centroid = lm_add2(centroid, res[i]); area += res[i - 1].x * res[i].y - res[i - 1].y * res[i].x; } centroid = lm_div2(centroid, (float)nRes); area = lm_absf(area / 2.0f); if (area <= 0.0f) return LM_FALSE; // no area left // calculate barycentric coords lm_vec2 uv = lm_toBarycentric( ctx->meshPosition.triangle.uv[0], ctx->meshPosition.triangle.uv[1], ctx->meshPosition.triangle.uv[2], centroid); if (!lm_finite2(uv)) return LM_FALSE; // degenerate // try to interpolate color from neighbors: if (ctx->meshPosition.pass > 0) { float *neighbors[4]; int neighborCount = 0; int neighborsExpected = 0; int d = (int)lm_passStepSize(ctx) / 2; int dirs = ((ctx->meshPosition.pass - 1) % 3) + 1; if (dirs & 1) // check x-neighbors with distance d { neighborsExpected += 2; if (ctx->meshPosition.rasterizer.x - d >= ctx->meshPosition.rasterizer.minx && ctx->meshPosition.rasterizer.x + d <= ctx->meshPosition.rasterizer.maxx) { neighbors[neighborCount++] = lm_getLightmapPixel(ctx, ctx->meshPosition.rasterizer.x - d, ctx->meshPosition.rasterizer.y); neighbors[neighborCount++] = lm_getLightmapPixel(ctx, ctx->meshPosition.rasterizer.x + d, ctx->meshPosition.rasterizer.y); } } if (dirs & 2) // check y-neighbors with distance d { neighborsExpected += 2; if (ctx->meshPosition.rasterizer.y - d >= ctx->meshPosition.rasterizer.miny && ctx->meshPosition.rasterizer.y + d <= ctx->meshPosition.rasterizer.maxy) { neighbors[neighborCount++] = lm_getLightmapPixel(ctx, ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y - d); neighbors[neighborCount++] = lm_getLightmapPixel(ctx, ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y + d); } } if (neighborCount == neighborsExpected) // are all interpolation neighbors available? { // calculate average neighbor pixel value float avg[4] = { 0 }; for (int i = 0; i < neighborCount; i++) for (int j = 0; j < ctx->lightmap.channels; j++) avg[j] += neighbors[i][j]; float ni = 1.0f / neighborCount; for (int j = 0; j < ctx->lightmap.channels; j++) avg[j] *= ni; // check if error from average pixel to neighbors is above the interpolation threshold lm_bool interpolate = LM_TRUE; for (int i = 0; i < neighborCount; i++) { lm_bool zero = LM_TRUE; for (int j = 0; j < ctx->lightmap.channels; j++) { if (neighbors[i][j] != 0.0f) zero = LM_FALSE; if (fabs(neighbors[i][j] - avg[j]) > ctx->interpolationThreshold) interpolate = LM_FALSE; } if (zero) interpolate = LM_FALSE; if (!interpolate) break; } // set interpolated value and return if interpolation is acceptable if (interpolate) { lm_setLightmapPixel(ctx, ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y, avg); #ifdef LM_DEBUG_INTERPOLATION // set interpolated pixel to green in debug output ctx->lightmap.debug[(ctx->meshPosition.rasterizer.y * ctx->lightmap.width + ctx->meshPosition.rasterizer.x) * 3 + 1] = 255; #endif return LM_FALSE; } } } // could not interpolate. must render a hemisphere. // calculate 3D sample position and orientation lm_vec3 p0 = ctx->meshPosition.triangle.p[0]; lm_vec3 p1 = ctx->meshPosition.triangle.p[1]; lm_vec3 p2 = ctx->meshPosition.triangle.p[2]; lm_vec3 v1 = lm_sub3(p1, p0); lm_vec3 v2 = lm_sub3(p2, p0); ctx->meshPosition.sample.position = lm_add3(p0, lm_add3(lm_scale3(v2, uv.x), lm_scale3(v1, uv.y))); lm_vec3 n0 = ctx->meshPosition.triangle.n[0]; lm_vec3 n1 = ctx->meshPosition.triangle.n[1]; lm_vec3 n2 = ctx->meshPosition.triangle.n[2]; lm_vec3 nv1 = lm_sub3(n1, n0); lm_vec3 nv2 = lm_sub3(n2, n0); ctx->meshPosition.sample.direction = lm_normalize3(lm_add3(n0, lm_add3(lm_scale3(nv2, uv.x), lm_scale3(nv1, uv.y)))); ctx->meshPosition.sample.direction = lm_normalize3(ctx->meshPosition.sample.direction); float cameraToSurfaceDistance = (1.0f + ctx->hemisphere.cameraToSurfaceDistanceModifier) * ctx->hemisphere.zNear * sqrtf(2.0f); ctx->meshPosition.sample.position = lm_add3(ctx->meshPosition.sample.position, lm_scale3(ctx->meshPosition.sample.direction, cameraToSurfaceDistance)); if (!lm_finite3(ctx->meshPosition.sample.position) || !lm_finite3(ctx->meshPosition.sample.direction) || lm_length3sq(ctx->meshPosition.sample.direction) < 0.5f) // don't allow 0.0f. should always be ~1.0f return LM_FALSE; lm_vec3 up = lm_v3(0.0f, 1.0f, 0.0f); if (lm_absf(lm_dot3(up, ctx->meshPosition.sample.direction)) > 0.8f) up = lm_v3(0.0f, 0.0f, 1.0f); #if 0 // triangle-consistent up vector ctx->meshPosition.sample.up = lm_normalize3(lm_cross3(up, ctx->meshPosition.sample.direction)); return LM_TRUE; #else // "randomized" rotation with pattern lm_vec3 side = lm_normalize3(lm_cross3(up, ctx->meshPosition.sample.direction)); up = lm_normalize3(lm_cross3(side, ctx->meshPosition.sample.direction)); int rx = ctx->meshPosition.rasterizer.x % 3; int ry = ctx->meshPosition.rasterizer.y % 3; static const float lm_pi = 3.14159265358979f; float phi = 2.0f * lm_pi * lm_baseAngles[ry][rx] + 0.1f * ((float)rand() / (float)RAND_MAX); ctx->meshPosition.sample.up = lm_normalize3(lm_add3(lm_scale3(side, cosf(phi)), lm_scale3(up, sinf(phi)))); return LM_TRUE; #endif } // returns true if a sampling position was found and // false if we finished rasterizing the current triangle static lm_bool lm_findFirstConservativeTriangleRasterizerPosition(lm_context *ctx) { while (!lm_trySamplingConservativeTriangleRasterizerPosition(ctx)) { lm_moveToNextPotentialConservativeTriangleRasterizerPosition(ctx); if (lm_hasConservativeTriangleRasterizerFinished(ctx)) return LM_FALSE; } return LM_TRUE; } static lm_bool lm_findNextConservativeTriangleRasterizerPosition(lm_context *ctx) { lm_moveToNextPotentialConservativeTriangleRasterizerPosition(ctx); return lm_findFirstConservativeTriangleRasterizerPosition(ctx); } static void lm_writeResultsToLightmap(lm_context* ctx) { // do the GPU->CPU transfer of downsampled hemispheres float* hemi = (float*)LM_CALLOC(ctx->hemisphere.storage.width * ctx->hemisphere.storage.height, 4 * sizeof(float)); glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.storage.texture); glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, hemi); // write results to lightmap texture for (int y = 0; y < ctx->hemisphere.storage.writePosition.y + (int)ctx->hemisphere.fbHemiCountY; y++) { for (int x = 0; x < ctx->hemisphere.storage.width; x++) { int index = y * ctx->hemisphere.storage.width + x; lm_ivec2 lmUV = ctx->hemisphere.storage.toLightmapLocation[index]; if (lmUV.x >= 0) { float* c = hemi + index * 4; float validity = c[3]; float* lm = ctx->lightmap.data + (lmUV.y * ctx->lightmap.width + lmUV.x) * ctx->lightmap.channels; if (!lm[0] && validity > 0.9) { float scale = 1.0f / validity; switch (ctx->lightmap.channels) { case 1: lm[0] = lm_maxf((c[0] + c[1] + c[2]) * scale / 3.0f, FLT_MIN); break; case 2: lm[0] = lm_maxf((c[0] + c[1] + c[2]) * scale / 3.0f, FLT_MIN); lm[1] = 1.0f; // do we want to support this format? break; case 3: lm[0] = lm_maxf(c[0] * scale, FLT_MIN); lm[1] = lm_maxf(c[1] * scale, FLT_MIN); lm[2] = lm_maxf(c[2] * scale, FLT_MIN); break; case 4: lm[0] = lm_maxf(c[0] * scale, FLT_MIN); lm[1] = lm_maxf(c[1] * scale, FLT_MIN); lm[2] = lm_maxf(c[2] * scale, FLT_MIN); lm[3] = 1.0f; break; default: assert(LM_FALSE); break; } #ifdef LM_DEBUG_INTERPOLATION // set sampled pixel to red in debug output ctx->lightmap.debug[(lmUV.y * ctx->lightmap.width + lmUV.x) * 3 + 0] = 255; #endif } } ctx->hemisphere.storage.toLightmapLocation[index].x = -1; // reset } } LM_FREE(hemi); ctx->hemisphere.storage.writePosition = lm_i2(0, 0); } static lm_bool lm_integrateHemisphereBatch(lm_context *ctx) { if (!ctx->hemisphere.fbHemiIndex) return LM_FALSE; // nothing to do glDisable(GL_DEPTH_TEST); glBindVertexArray(ctx->hemisphere.vao); int fbRead = 0; int fbWrite = 1; // weighted downsampling pass int outHemiSize = ctx->hemisphere.size / 2; glBindFramebuffer(GL_FRAMEBUFFER, ctx->hemisphere.fb[fbWrite]); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, ctx->hemisphere.fbTexture[fbWrite], 0); glViewport(0, 0, outHemiSize * ctx->hemisphere.fbHemiCountX, outHemiSize * ctx->hemisphere.fbHemiCountY); glUseProgram(ctx->hemisphere.firstPass.programID); glUniform1i(ctx->hemisphere.firstPass.hemispheresTextureID, 0); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.fbTexture[fbRead]); glUniform1i(ctx->hemisphere.firstPass.weightsTextureID, 1); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.firstPass.weightsTexture); glActiveTexture(GL_TEXTURE0); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); //glBindTexture(GL_TEXTURE_2D, 0); #if 0 // debug output int w = outHemiSize * ctx->hemisphere.fbHemiCountX, h = outHemiSize * ctx->hemisphere.fbHemiCountY; glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, ctx->hemisphere.fb[fbWrite]); glReadBuffer(GL_COLOR_ATTACHMENT0); float *image = new float[3 * w * h]; glReadPixels(0, 0, w, h, GL_RGB, GL_FLOAT, image); lmImageSaveTGAf("firstpass.png", image, w, h, 3); delete[] image; #endif // downsampling passes glUseProgram(ctx->hemisphere.downsamplePass.programID); glUniform1i(ctx->hemisphere.downsamplePass.hemispheresTextureID, 0); while (outHemiSize > 1) { LM_SWAP(int, fbRead, fbWrite); outHemiSize /= 2; glBindFramebuffer(GL_FRAMEBUFFER, ctx->hemisphere.fb[fbWrite]); glViewport(0, 0, outHemiSize * ctx->hemisphere.fbHemiCountX, outHemiSize * ctx->hemisphere.fbHemiCountY); glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.fbTexture[fbRead]); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); //glBindTexture(GL_TEXTURE_2D, 0); } // copy results to storage texture glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.storage.texture); glCopyTexSubImage2D(GL_TEXTURE_2D, 0, ctx->hemisphere.storage.writePosition.x, ctx->hemisphere.storage.writePosition.y, 0, 0, ctx->hemisphere.fbHemiCountX, ctx->hemisphere.fbHemiCountY); glBindTexture(GL_TEXTURE_2D, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0); glBindVertexArray(0); glEnable(GL_DEPTH_TEST); // copy position mapping to storage for (unsigned int y = 0; y < ctx->hemisphere.fbHemiCountY; y++) { int sy = ctx->hemisphere.storage.writePosition.y + y; for (unsigned int x = 0; x < ctx->hemisphere.fbHemiCountX; x++) { int sx = ctx->hemisphere.storage.writePosition.x + x; unsigned int hemiIndex = y * ctx->hemisphere.fbHemiCountX + x; ctx->hemisphere.storage.toLightmapLocation[sy * ctx->hemisphere.storage.width + sx] = (hemiIndex >= ctx->hemisphere.fbHemiIndex) ? lm_i2(-1, -1) : ctx->hemisphere.fbHemiToLightmapLocation[hemiIndex]; } } lm_bool needWrite = LM_TRUE; // advance storage texture write position ctx->hemisphere.storage.writePosition.x += ctx->hemisphere.fbHemiCountX; if (ctx->hemisphere.storage.writePosition.x + (int)ctx->hemisphere.fbHemiCountX > ctx->hemisphere.storage.width) { ctx->hemisphere.storage.writePosition.x = 0; // storage is full if (ctx->hemisphere.storage.writePosition.y + (int)ctx->hemisphere.fbHemiCountY >= ctx->hemisphere.storage.height) { lm_writeResultsToLightmap(ctx); // read storage data from gpu memory and write it to the lightmap needWrite = LM_FALSE; } else { ctx->hemisphere.storage.writePosition.y += ctx->hemisphere.fbHemiCountY; } } ctx->hemisphere.fbHemiIndex = 0; return needWrite; } static void lm_setView( int* viewport, int x, int y, int w, int h, float* view, lm_vec3 pos, lm_vec3 dir, lm_vec3 up, float* proj, float l, float r, float b, float t, float n, float f) { // viewport viewport[0] = x; viewport[1] = y; viewport[2] = w; viewport[3] = h; // view matrix: lookAt(pos, pos + dir, up) lm_vec3 side = lm_cross3(dir, up); //up = cross(side, dir); dir = lm_negate3(dir); pos = lm_negate3(pos); view[ 0] = side.x; view[ 1] = up.x; view[ 2] = dir.x; view[ 3] = 0.0f; view[ 4] = side.y; view[ 5] = up.y; view[ 6] = dir.y; view[ 7] = 0.0f; view[ 8] = side.z; view[ 9] = up.z; view[10] = dir.z; view[11] = 0.0f; view[12] = lm_dot3(side, pos); view[13] = lm_dot3(up, pos); view[14] = lm_dot3(dir, pos); view[15] = 1.0f; // projection matrix: frustum(l, r, b, t, n, f) float ilr = 1.0f / (r - l), ibt = 1.0f / (t - b), ninf = -1.0f / (f - n), n2 = 2.0f * n; proj[ 0] = n2 * ilr; proj[ 1] = 0.0f; proj[ 2] = 0.0f; proj[ 3] = 0.0f; proj[ 4] = 0.0f; proj[ 5] = n2 * ibt; proj[ 6] = 0.0f; proj[ 7] = 0.0f; proj[ 8] = (r + l) * ilr; proj[ 9] = (t + b) * ibt; proj[10] = (f + n) * ninf; proj[11] = -1.0f; proj[12] = 0.0f; proj[13] = 0.0f; proj[14] = f * n2 * ninf; proj[15] = 0.0f; } // returns true if a hemisphere side was prepared for rendering and // false if we finished the current hemisphere static lm_bool lm_beginSampleHemisphere(lm_context *ctx, int* viewport, float* view, float* proj) { if (ctx->meshPosition.hemisphere.side >= 5) return LM_FALSE; if (ctx->meshPosition.hemisphere.side == 0) { // prepare hemisphere glBindFramebuffer(GL_FRAMEBUFFER, ctx->hemisphere.fb[0]); if (ctx->hemisphere.fbHemiIndex == 0) { // prepare hemisphere batch glClearColor( // clear to valid background pixels! ctx->hemisphere.clearColor.r, ctx->hemisphere.clearColor.g, ctx->hemisphere.clearColor.b, 1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); } ctx->hemisphere.fbHemiToLightmapLocation[ctx->hemisphere.fbHemiIndex] = lm_i2(ctx->meshPosition.rasterizer.x, ctx->meshPosition.rasterizer.y); } // find the target position in the batch int x = (ctx->hemisphere.fbHemiIndex % ctx->hemisphere.fbHemiCountX) * ctx->hemisphere.size * 3; int y = (ctx->hemisphere.fbHemiIndex / ctx->hemisphere.fbHemiCountX) * ctx->hemisphere.size; int size = ctx->hemisphere.size; float zNear = ctx->hemisphere.zNear; float zFar = ctx->hemisphere.zFar; lm_vec3 pos = ctx->meshPosition.sample.position; lm_vec3 dir = ctx->meshPosition.sample.direction; lm_vec3 up = ctx->meshPosition.sample.up; lm_vec3 right = lm_cross3(dir, up); // find the view parameters of the hemisphere side that we will render next // hemisphere layout in the framebuffer: // +-------+---+---+-------+ // | | | | D | // | C | R | L +-------+ // | | | | U | // +-------+---+---+-------+ switch (ctx->meshPosition.hemisphere.side) { case 0: // center lm_setView(viewport, x, y, size, size, view, pos, dir, up, proj, -zNear, zNear, -zNear, zNear, zNear, zFar); break; case 1: // right lm_setView(viewport, size + x, y, size / 2, size, view, pos, right, up, proj, -zNear, 0.0f, -zNear, zNear, zNear, zFar); break; case 2: // left lm_setView(viewport, size + x + size / 2, y, size / 2, size, view, pos, lm_negate3(right), up, proj, 0.0f, zNear, -zNear, zNear, zNear, zFar); break; case 3: // down lm_setView(viewport, 2 * size + x, y + size / 2, size, size / 2, view, pos, lm_negate3(up), dir, proj, -zNear, zNear, 0.0f, zNear, zNear, zFar); break; case 4: // up lm_setView(viewport, 2 * size + x, y, size, size / 2, view, pos, up, lm_negate3(dir), proj, -zNear, zNear, -zNear, 0.0f, zNear, zFar); break; default: assert(LM_FALSE); break; } return LM_TRUE; } static void lm_endSampleHemisphere(lm_context *ctx) { if (++ctx->meshPosition.hemisphere.side == 5) { // finish hemisphere glBindFramebuffer(GL_FRAMEBUFFER, 0); if (++ctx->hemisphere.fbHemiIndex == ctx->hemisphere.fbHemiCountX * ctx->hemisphere.fbHemiCountY) { // downsample new hemisphere batch and store the results lm_integrateHemisphereBatch(ctx); } } } static void lm_inverseTranspose(const float *m44, float *n33) { if (!m44) { n33[0] = 1.0f; n33[1] = 0.0f; n33[2] = 0.0f; n33[3] = 0.0f; n33[4] = 1.0f; n33[5] = 0.0f; n33[6] = 0.0f; n33[7] = 0.0f; n33[8] = 1.0f; return; } float determinant = m44[ 0] * (m44[ 5] * m44[10] - m44[ 9] * m44[ 6]) - m44[ 1] * (m44[ 4] * m44[10] - m44[ 6] * m44[ 8]) + m44[ 2] * (m44[ 4] * m44[ 9] - m44[ 5] * m44[ 8]); assert(fabs(determinant) > FLT_EPSILON); float rcpDeterminant = 1.0f / determinant; n33[0] = (m44[ 5] * m44[10] - m44[ 9] * m44[ 6]) * rcpDeterminant; n33[3] = -(m44[ 1] * m44[10] - m44[ 2] * m44[ 9]) * rcpDeterminant; n33[6] = (m44[ 1] * m44[ 6] - m44[ 2] * m44[ 5]) * rcpDeterminant; n33[1] = -(m44[ 4] * m44[10] - m44[ 6] * m44[ 8]) * rcpDeterminant; n33[4] = (m44[ 0] * m44[10] - m44[ 2] * m44[ 8]) * rcpDeterminant; n33[7] = -(m44[ 0] * m44[ 6] - m44[ 4] * m44[ 2]) * rcpDeterminant; n33[2] = (m44[ 4] * m44[ 9] - m44[ 8] * m44[ 5]) * rcpDeterminant; n33[5] = -(m44[ 0] * m44[ 9] - m44[ 8] * m44[ 1]) * rcpDeterminant; n33[8] = (m44[ 0] * m44[ 5] - m44[ 4] * m44[ 1]) * rcpDeterminant; } static lm_vec3 lm_transformNormal(const float *m, lm_vec3 n) { lm_vec3 r; r.x = m[0] * n.x + m[3] * n.y + m[6] * n.z; r.y = m[1] * n.x + m[4] * n.y + m[7] * n.z; r.z = m[2] * n.x + m[5] * n.y + m[8] * n.z; return r; } static lm_vec3 lm_transformPosition(const float *m, lm_vec3 v) { if (!m) return v; lm_vec3 r; r.x = m[0] * v.x + m[4] * v.y + m[ 8] * v.z + m[12]; r.y = m[1] * v.x + m[5] * v.y + m[ 9] * v.z + m[13]; r.z = m[2] * v.x + m[6] * v.y + m[10] * v.z + m[14]; float d = m[3] * v.x + m[7] * v.y + m[11] * v.z + m[15]; assert(lm_absf(d - 1.0f) < 0.00001f); // could divide by d, but this shouldn't be a projection transform! return r; } static void lm_setMeshPosition(lm_context *ctx, unsigned int indicesTriangleBaseIndex) { // fetch triangle at the specified indicesTriangleBaseIndex ctx->meshPosition.triangle.baseIndex = indicesTriangleBaseIndex; // load and transform triangle to process next lm_vec2 uvMin = lm_v2(FLT_MAX, FLT_MAX), uvMax = lm_v2(-FLT_MAX, -FLT_MAX); lm_vec2 uvScale = lm_v2i(ctx->lightmap.width, ctx->lightmap.height); unsigned int vIndices[3]; for (int i = 0; i < 3; i++) { // decode index unsigned int vIndex; switch (ctx->mesh.indicesType) { case LM_NONE: vIndex = ctx->meshPosition.triangle.baseIndex + i; break; case LM_UNSIGNED_BYTE: vIndex = ((const unsigned char*)ctx->mesh.indices + ctx->meshPosition.triangle.baseIndex)[i]; break; case LM_UNSIGNED_SHORT: vIndex = ((const unsigned short*)ctx->mesh.indices + ctx->meshPosition.triangle.baseIndex)[i]; break; case LM_UNSIGNED_INT: vIndex = ((const unsigned int*)ctx->mesh.indices + ctx->meshPosition.triangle.baseIndex)[i]; break; default: assert(LM_FALSE); break; } vIndices[i] = vIndex; // decode and pre-transform vertex position const void *pPtr = ctx->mesh.positions + vIndex * ctx->mesh.positionsStride; lm_vec3 p; switch (ctx->mesh.positionsType) { // TODO: signed formats case LM_UNSIGNED_BYTE: { const unsigned char *uc = (const unsigned char*)pPtr; p = lm_v3(uc[0], uc[1], uc[2]); } break; case LM_UNSIGNED_SHORT: { const unsigned short *us = (const unsigned short*)pPtr; p = lm_v3(us[0], us[1], us[2]); } break; case LM_UNSIGNED_INT: { const unsigned int *ui = (const unsigned int*)pPtr; p = lm_v3((float)ui[0], (float)ui[1], (float)ui[2]); } break; case LM_FLOAT: { p = *(const lm_vec3*)pPtr; } break; default: { assert(LM_FALSE); } break; } ctx->meshPosition.triangle.p[i] = lm_transformPosition(ctx->mesh.modelMatrix, p); // decode and scale (to lightmap resolution) vertex lightmap texture coords const void *uvPtr = ctx->mesh.uvs + vIndex * ctx->mesh.uvsStride; lm_vec2 uv; switch (ctx->mesh.uvsType) { case LM_UNSIGNED_BYTE: { const unsigned char *uc = (const unsigned char*)uvPtr; uv = lm_v2(uc[0] / (float)UCHAR_MAX, uc[1] / (float)UCHAR_MAX); } break; case LM_UNSIGNED_SHORT: { const unsigned short *us = (const unsigned short*)uvPtr; uv = lm_v2(us[0] / (float)USHRT_MAX, us[1] / (float)USHRT_MAX); } break; case LM_UNSIGNED_INT: { const unsigned int *ui = (const unsigned int*)uvPtr; uv = lm_v2(ui[0] / (float)UINT_MAX, ui[1] / (float)UINT_MAX); } break; case LM_FLOAT: { uv = *(const lm_vec2*)uvPtr; } break; default: { assert(LM_FALSE); } break; } ctx->meshPosition.triangle.uv[i] = lm_mul2(lm_pmod2(uv, 1.0f), uvScale); // maybe clamp to 0.0-1.0 instead of pmod? // update bounds on lightmap uvMin = lm_min2(uvMin, ctx->meshPosition.triangle.uv[i]); uvMax = lm_max2(uvMax, ctx->meshPosition.triangle.uv[i]); } lm_vec3 flatNormal = lm_cross3( lm_sub3(ctx->meshPosition.triangle.p[1], ctx->meshPosition.triangle.p[0]), lm_sub3(ctx->meshPosition.triangle.p[2], ctx->meshPosition.triangle.p[0])); for (int i = 0; i < 3; i++) { // decode and pre-transform vertex normal const void *nPtr = ctx->mesh.normals + vIndices[i] * ctx->mesh.normalsStride; lm_vec3 n; switch (ctx->mesh.normalsType) { // TODO: signed formats case LM_FLOAT: { n = *(const lm_vec3*)nPtr; n = lm_normalize3(lm_transformNormal(ctx->mesh.normalMatrix, n)); } break; case LM_NONE: { n = flatNormal; } break; default: { assert(LM_FALSE); } break; } ctx->meshPosition.triangle.n[i] = n; } // calculate area of interest (on lightmap) for conservative rasterization lm_vec2 bbMin = lm_floor2(uvMin); lm_vec2 bbMax = lm_ceil2 (uvMax); ctx->meshPosition.rasterizer.minx = lm_maxi((int)bbMin.x - 1, 0); ctx->meshPosition.rasterizer.miny = lm_maxi((int)bbMin.y - 1, 0); ctx->meshPosition.rasterizer.maxx = lm_mini((int)bbMax.x + 1, ctx->lightmap.width - 1); ctx->meshPosition.rasterizer.maxy = lm_mini((int)bbMax.y + 1, ctx->lightmap.height - 1); assert(ctx->meshPosition.rasterizer.minx <= ctx->meshPosition.rasterizer.maxx && ctx->meshPosition.rasterizer.miny <= ctx->meshPosition.rasterizer.maxy); ctx->meshPosition.rasterizer.x = ctx->meshPosition.rasterizer.minx + lm_passOffsetX(ctx); ctx->meshPosition.rasterizer.y = ctx->meshPosition.rasterizer.miny + lm_passOffsetY(ctx); // try moving to first valid sample position if (ctx->meshPosition.rasterizer.x <= ctx->meshPosition.rasterizer.maxx && ctx->meshPosition.rasterizer.y <= ctx->meshPosition.rasterizer.maxy && lm_findFirstConservativeTriangleRasterizerPosition(ctx)) ctx->meshPosition.hemisphere.side = 0; // we can start sampling the hemisphere else ctx->meshPosition.hemisphere.side = 5; // no samples on this triangle! put hemisphere sampler into finished state } static GLuint lm_LoadShader(GLenum type, const char *source) { GLuint shader = glCreateShader(type); if (shader == 0) { fprintf(stderr, "Could not create shader!\n"); return 0; } glShaderSource(shader, 1, &source, NULL); glCompileShader(shader); GLint compiled; glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); if (!compiled) { fprintf(stderr, "Could not compile shader!\n"); GLint infoLen = 0; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen); if (infoLen) { char* infoLog = (char*)malloc(infoLen); glGetShaderInfoLog(shader, infoLen, NULL, infoLog); fprintf(stderr, "%s\n", infoLog); free(infoLog); } glDeleteShader(shader); return 0; } return shader; } static GLuint lm_LoadProgram(const char *vp, const char *fp) { GLuint program = glCreateProgram(); if (program == 0) { fprintf(stderr, "Could not create program!\n"); return 0; } GLuint vertexShader = lm_LoadShader(GL_VERTEX_SHADER, vp); GLuint fragmentShader = lm_LoadShader(GL_FRAGMENT_SHADER, fp); glAttachShader(program, vertexShader); glAttachShader(program, fragmentShader); glLinkProgram(program); glDeleteShader(vertexShader); glDeleteShader(fragmentShader); GLint linked; glGetProgramiv(program, GL_LINK_STATUS, &linked); if (!linked) { fprintf(stderr, "Could not link program!\n"); GLint infoLen = 0; glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLen); if (infoLen) { char* infoLog = (char*)malloc(sizeof(char) * infoLen); glGetProgramInfoLog(program, infoLen, NULL, infoLog); fprintf(stderr, "%s\n", infoLog); free(infoLog); } glDeleteProgram(program); return 0; } return program; } static float lm_defaultWeights(float cos_theta, void *userdata) { return 1.0f; } lm_context *lmCreate(int hemisphereSize, float zNear, float zFar, float clearR, float clearG, float clearB, int interpolationPasses, float interpolationThreshold, float cameraToSurfaceDistanceModifier) { assert(hemisphereSize == 512 || hemisphereSize == 256 || hemisphereSize == 128 || hemisphereSize == 64 || hemisphereSize == 32 || hemisphereSize == 16); assert(zNear < zFar && zNear > 0.0f); assert(cameraToSurfaceDistanceModifier >= -1.0f); assert(interpolationPasses >= 0 && interpolationPasses <= 8); assert(interpolationThreshold >= 0.0f); lm_context *ctx = (lm_context*)LM_CALLOC(1, sizeof(lm_context)); ctx->meshPosition.passCount = 1 + 3 * interpolationPasses; ctx->interpolationThreshold = interpolationThreshold; ctx->hemisphere.size = hemisphereSize; ctx->hemisphere.zNear = zNear; ctx->hemisphere.zFar = zFar; ctx->hemisphere.cameraToSurfaceDistanceModifier = cameraToSurfaceDistanceModifier; ctx->hemisphere.clearColor.r = clearR; ctx->hemisphere.clearColor.g = clearG; ctx->hemisphere.clearColor.b = clearB; // calculate hemisphere batch size ctx->hemisphere.fbHemiCountX = 1536 / (3 * ctx->hemisphere.size); ctx->hemisphere.fbHemiCountY = 512 / ctx->hemisphere.size; // hemisphere batch framebuffers unsigned int w[] = { ctx->hemisphere.fbHemiCountX * ctx->hemisphere.size * 3, ctx->hemisphere.fbHemiCountX * ctx->hemisphere.size / 2 }; unsigned int h[] = { ctx->hemisphere.fbHemiCountY * ctx->hemisphere.size, ctx->hemisphere.fbHemiCountY * ctx->hemisphere.size / 2 }; glGenTextures(2, ctx->hemisphere.fbTexture); glGenFramebuffers(2, ctx->hemisphere.fb); glGenRenderbuffers(1, &ctx->hemisphere.fbDepth); glBindRenderbuffer(GL_RENDERBUFFER, ctx->hemisphere.fbDepth); glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, w[0], h[0]); glBindFramebuffer(GL_FRAMEBUFFER, ctx->hemisphere.fb[0]); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, ctx->hemisphere.fbDepth); for (int i = 0; i < 2; i++) { glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.fbTexture[i]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, w[i], h[i], 0, GL_RGBA, GL_FLOAT, 0); glBindFramebuffer(GL_FRAMEBUFFER, ctx->hemisphere.fb[i]); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, ctx->hemisphere.fbTexture[i], 0); GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status != GL_FRAMEBUFFER_COMPLETE) { fprintf(stderr, "Could not create framebuffer!\n"); glDeleteRenderbuffers(1, &ctx->hemisphere.fbDepth); glDeleteFramebuffers(2, ctx->hemisphere.fb); glDeleteTextures(2, ctx->hemisphere.fbTexture); LM_FREE(ctx); return NULL; } } glBindFramebuffer(GL_FRAMEBUFFER, 0); // dummy vao for fullscreen quad rendering glGenVertexArrays(1, &ctx->hemisphere.vao); // hemisphere shader (weighted downsampling of the 3x1 hemisphere layout to a 0.5x0.5 square) { const char *vs = "#version 150 core\n" "const vec2 ps[4] = vec2[](vec2(1, -1), vec2(1, 1), vec2(-1, -1), vec2(-1, 1));\n" "void main()\n" "{\n" "gl_Position = vec4(ps[gl_VertexID], 0, 1);\n" "}\n"; const char *fs = "#version 150 core\n" "uniform sampler2D hemispheres;\n" "uniform sampler2D weights;\n" "layout(pixel_center_integer) in vec4 gl_FragCoord;\n" // whole integer values represent pixel centers, GL_ARB_fragment_coord_conventions "out vec4 outColor;\n" "vec4 weightedSample(ivec2 h_uv, ivec2 w_uv, ivec2 quadrant)\n" "{\n" "vec4 sample = texelFetch(hemispheres, h_uv + quadrant, 0);\n" "vec2 weight = texelFetch(weights, w_uv + quadrant, 0).rg;\n" "return vec4(sample.rgb * weight.r, sample.a * weight.g);\n" "}\n" "vec4 threeWeightedSamples(ivec2 h_uv, ivec2 w_uv, ivec2 offset)\n" "{\n" // horizontal triple sum "vec4 sum = weightedSample(h_uv, w_uv, offset);\n" "sum += weightedSample(h_uv, w_uv, offset + ivec2(2, 0));\n" "sum += weightedSample(h_uv, w_uv, offset + ivec2(4, 0));\n" "return sum;\n" "}\n" "void main()\n" "{\n" // this is a weighted sum downsampling pass (alpha component contains the weighted valid sample count) "vec2 in_uv = gl_FragCoord.xy * vec2(6.0, 2.0) + vec2(0.5);\n" "ivec2 h_uv = ivec2(in_uv);\n" "ivec2 w_uv = ivec2(mod(in_uv, vec2(textureSize(weights, 0))));\n" // there's no integer modulo :( "vec4 lb = threeWeightedSamples(h_uv, w_uv, ivec2(0, 0));\n" "vec4 rb = threeWeightedSamples(h_uv, w_uv, ivec2(1, 0));\n" "vec4 lt = threeWeightedSamples(h_uv, w_uv, ivec2(0, 1));\n" "vec4 rt = threeWeightedSamples(h_uv, w_uv, ivec2(1, 1));\n" "outColor = lb + rb + lt + rt;\n" "}\n"; ctx->hemisphere.firstPass.programID = lm_LoadProgram(vs, fs); if (!ctx->hemisphere.firstPass.programID) { fprintf(stderr, "Error loading the hemisphere first pass shader program... leaving!\n"); glDeleteVertexArrays(1, &ctx->hemisphere.vao); glDeleteRenderbuffers(1, &ctx->hemisphere.fbDepth); glDeleteFramebuffers(2, ctx->hemisphere.fb); glDeleteTextures(2, ctx->hemisphere.fbTexture); LM_FREE(ctx); return NULL; } ctx->hemisphere.firstPass.hemispheresTextureID = glGetUniformLocation(ctx->hemisphere.firstPass.programID, "hemispheres"); ctx->hemisphere.firstPass.weightsTextureID = glGetUniformLocation(ctx->hemisphere.firstPass.programID, "weights"); } // downsample shader { const char *vs = "#version 150 core\n" "const vec2 ps[4] = vec2[](vec2(1, -1), vec2(1, 1), vec2(-1, -1), vec2(-1, 1));\n" "void main()\n" "{\n" "gl_Position = vec4(ps[gl_VertexID], 0, 1);\n" "}\n"; const char *fs = "#version 150 core\n" "uniform sampler2D hemispheres;\n" "layout(pixel_center_integer) in vec4 gl_FragCoord;\n" // whole integer values represent pixel centers, GL_ARB_fragment_coord_conventions "out vec4 outColor;\n" "void main()\n" "{\n" // this is a sum downsampling pass (alpha component contains the weighted valid sample count) "ivec2 h_uv = ivec2(gl_FragCoord.xy) * 2;\n" "vec4 lb = texelFetch(hemispheres, h_uv + ivec2(0, 0), 0);\n" "vec4 rb = texelFetch(hemispheres, h_uv + ivec2(1, 0), 0);\n" "vec4 lt = texelFetch(hemispheres, h_uv + ivec2(0, 1), 0);\n" "vec4 rt = texelFetch(hemispheres, h_uv + ivec2(1, 1), 0);\n" "outColor = lb + rb + lt + rt;\n" "}\n"; ctx->hemisphere.downsamplePass.programID = lm_LoadProgram(vs, fs); if (!ctx->hemisphere.downsamplePass.programID) { fprintf(stderr, "Error loading the hemisphere downsample pass shader program... leaving!\n"); glDeleteProgram(ctx->hemisphere.firstPass.programID); glDeleteVertexArrays(1, &ctx->hemisphere.vao); glDeleteRenderbuffers(1, &ctx->hemisphere.fbDepth); glDeleteFramebuffers(2, ctx->hemisphere.fb); glDeleteTextures(2, ctx->hemisphere.fbTexture); LM_FREE(ctx); return NULL; } ctx->hemisphere.downsamplePass.hemispheresTextureID = glGetUniformLocation(ctx->hemisphere.downsamplePass.programID, "hemispheres"); } // hemisphere weights texture glGenTextures(1, &ctx->hemisphere.firstPass.weightsTexture); lmSetHemisphereWeights(ctx, lm_defaultWeights, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); // allocate batchPosition-to-lightmapPosition map ctx->hemisphere.fbHemiToLightmapLocation = (lm_ivec2*)LM_CALLOC(ctx->hemisphere.fbHemiCountX * ctx->hemisphere.fbHemiCountY, sizeof(lm_ivec2)); return ctx; } void lmDestroy(lm_context *ctx) { // reset state glUseProgram(0); glBindTexture(GL_TEXTURE_2D, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindVertexArray(0); glBindFramebuffer(GL_FRAMEBUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); // delete gl objects glDeleteTextures(1, &ctx->hemisphere.firstPass.weightsTexture); glDeleteTextures(1, &ctx->hemisphere.storage.texture); glDeleteProgram(ctx->hemisphere.downsamplePass.programID); glDeleteProgram(ctx->hemisphere.firstPass.programID); glDeleteVertexArrays(1, &ctx->hemisphere.vao); glDeleteRenderbuffers(1, &ctx->hemisphere.fbDepth); glDeleteFramebuffers(2, ctx->hemisphere.fb); glDeleteTextures(2, ctx->hemisphere.fbTexture); glDeleteTextures(1, &ctx->hemisphere.storage.texture); // free memory LM_FREE(ctx->hemisphere.storage.toLightmapLocation); LM_FREE(ctx->hemisphere.fbHemiToLightmapLocation); #ifdef LM_DEBUG_INTERPOLATION LM_FREE(ctx->lightmap.debug); #endif LM_FREE(ctx); } void lmSetHemisphereWeights(lm_context *ctx, lm_weight_func f, void *userdata) { // hemisphere weights texture. bakes in material dependent attenuation behaviour. float *weights = (float*)LM_CALLOC(2 * 3 * ctx->hemisphere.size * ctx->hemisphere.size, sizeof(float)); float center = (ctx->hemisphere.size - 1) * 0.5f; double sum = 0.0; for (unsigned int y = 0; y < ctx->hemisphere.size; y++) { float dy = 2.0f * (y - center) / (float)ctx->hemisphere.size; for (unsigned int x = 0; x < ctx->hemisphere.size; x++) { float dx = 2.0f * (x - center) / (float)ctx->hemisphere.size; lm_vec3 v = lm_normalize3(lm_v3(dx, dy, 1.0f)); float solidAngle = v.z * v.z * v.z; float *w0 = weights + 2 * (y * (3 * ctx->hemisphere.size) + x); float *w1 = w0 + 2 * ctx->hemisphere.size; float *w2 = w1 + 2 * ctx->hemisphere.size; // center weights w0[0] = solidAngle * f(v.z, userdata); w0[1] = solidAngle; // left/right side weights w1[0] = solidAngle * f(lm_absf(v.x), userdata); w1[1] = solidAngle; // up/down side weights w2[0] = solidAngle * f(lm_absf(v.y), userdata); w2[1] = solidAngle; sum += 3.0 * (double)solidAngle; } } // normalize weights float weightScale = (float)(1.0 / sum); for (unsigned int i = 0; i < 2 * 3 * ctx->hemisphere.size * ctx->hemisphere.size; i++) weights[i] *= weightScale; // upload weight texture glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.firstPass.weightsTexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RG32F, 3 * ctx->hemisphere.size, ctx->hemisphere.size, 0, GL_RG, GL_FLOAT, weights); LM_FREE(weights); } static void lm_initStorage(lm_context *ctx, int w, int h) { // allocate storage texture if (!ctx->hemisphere.storage.texture) glGenTextures(1, &ctx->hemisphere.storage.texture); ctx->hemisphere.storage.width = w; ctx->hemisphere.storage.height = h; glBindTexture(GL_TEXTURE_2D, ctx->hemisphere.storage.texture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, w, h, 0, GL_RGBA, GL_FLOAT, 0); // allocate storage position to lightmap position map if (ctx->hemisphere.storage.toLightmapLocation) LM_FREE(ctx->hemisphere.storage.toLightmapLocation); ctx->hemisphere.storage.toLightmapLocation = (lm_ivec2*)LM_CALLOC(w * h, sizeof(lm_ivec2)); // invalidate all positions for (int i = 0; i < w * h; i++) ctx->hemisphere.storage.toLightmapLocation[i].x = -1; } void lmSetTargetLightmap(lm_context *ctx, float *outLightmap, int w, int h, int c) { ctx->lightmap.data = outLightmap; ctx->lightmap.width = w; ctx->lightmap.height = h; ctx->lightmap.channels = c; unsigned int sw = w > ctx->hemisphere.fbHemiCountX ? w : ctx->hemisphere.fbHemiCountX; unsigned int sh = h > ctx->hemisphere.fbHemiCountY ? h : ctx->hemisphere.fbHemiCountY; lm_initStorage(ctx, sw, sh); #ifdef LM_DEBUG_INTERPOLATION if (ctx->lightmap.debug) LM_FREE(ctx->lightmap.debug); ctx->lightmap.debug = (unsigned char*)LM_CALLOC(ctx->lightmap.width * ctx->lightmap.height, 3); #endif } void lmSetGeometry(lm_context *ctx, const float *transformationMatrix, lm_type positionsType, const void *positionsXYZ, int positionsStride, lm_type normalsType, const void *normalsXYZ, int normalsStride, lm_type lightmapCoordsType, const void *lightmapCoordsUV, int lightmapCoordsStride, int count, lm_type indicesType, const void *indices) { ctx->mesh.modelMatrix = transformationMatrix; ctx->mesh.positions = (const unsigned char*)positionsXYZ; ctx->mesh.positionsType = positionsType; ctx->mesh.positionsStride = positionsStride == 0 ? sizeof(lm_vec3) : positionsStride; ctx->mesh.normals = (const unsigned char*)normalsXYZ; ctx->mesh.normalsType = normalsType; ctx->mesh.normalsStride = normalsStride == 0 ? sizeof(lm_vec3) : normalsStride; ctx->mesh.uvs = (const unsigned char*)lightmapCoordsUV; ctx->mesh.uvsType = lightmapCoordsType; ctx->mesh.uvsStride = lightmapCoordsStride == 0 ? sizeof(lm_vec2) : lightmapCoordsStride; ctx->mesh.indicesType = indicesType; ctx->mesh.indices = (const unsigned char*)indices; ctx->mesh.count = count; lm_inverseTranspose(transformationMatrix, ctx->mesh.normalMatrix); ctx->meshPosition.pass = 0; lm_setMeshPosition(ctx, 0); } lm_bool lmBegin(lm_context *ctx, int* outViewport4, float* outView4x4, float* outProjection4x4) { assert(ctx->meshPosition.triangle.baseIndex < ctx->mesh.count); while (!lm_beginSampleHemisphere(ctx, outViewport4, outView4x4, outProjection4x4)) { // as long as there are no hemisphere sides to render... // try moving to the next rasterizer position if (lm_findNextConservativeTriangleRasterizerPosition(ctx)) { // if we successfully moved to the next sample position on the current triangle... ctx->meshPosition.hemisphere.side = 0; // start sampling a hemisphere there } else { // if there are no valid sample positions on the current triangle... if (ctx->meshPosition.triangle.baseIndex + 3 < ctx->mesh.count) { // ...and there are triangles left: move to the next triangle and continue sampling. lm_setMeshPosition(ctx, ctx->meshPosition.triangle.baseIndex + 3); } else { // ...and there are no triangles left: finish if (lm_integrateHemisphereBatch(ctx)) // integrate and store last batch lm_writeResultsToLightmap(ctx); // read storage data from gpu memory and write it to the lightmap if (++ctx->meshPosition.pass == ctx->meshPosition.passCount) { ctx->meshPosition.pass = 0; ctx->meshPosition.triangle.baseIndex = ctx->mesh.count; // set end condition (in case someone accidentally calls lmBegin again) #ifdef LM_DEBUG_INTERPOLATION lmImageSaveTGAub("debug_interpolation.tga", ctx->lightmap.debug, ctx->lightmap.width, ctx->lightmap.height, 3); // lightmap texel statistics int rendered = 0, interpolated = 0, wasted = 0; for (int y = 0; y < ctx->lightmap.height; y++) { for (int x = 0; x < ctx->lightmap.width; x++) { if (ctx->lightmap.debug[(y * ctx->lightmap.width + x) * 3 + 0]) rendered++; else if (ctx->lightmap.debug[(y * ctx->lightmap.width + x) * 3 + 1]) interpolated++; else wasted++; } } int used = rendered + interpolated; int total = used + wasted; printf("\n#######################################################################\n"); printf("%10d %6.2f%% rendered hemicubes integrated to lightmap texels.\n", rendered, 100.0f * (float)rendered / (float)total); printf("%10d %6.2f%% interpolated lightmap texels.\n", interpolated, 100.0f * (float)interpolated / (float)total); printf("%10d %6.2f%% wasted lightmap texels.\n", wasted, 100.0f * (float)wasted / (float)total); printf("\n%17.2f%% of used texels were rendered.\n", 100.0f * (float)rendered / (float)used); printf("#######################################################################\n"); #endif return LM_FALSE; } lm_setMeshPosition(ctx, 0); // start over with the next pass } } } return LM_TRUE; } float lmProgress(lm_context *ctx) { float passProgress = (float)ctx->meshPosition.triangle.baseIndex / (float)ctx->mesh.count; return ((float)ctx->meshPosition.pass + passProgress) / (float)ctx->meshPosition.passCount; } void lmEnd(lm_context *ctx) { lm_endSampleHemisphere(ctx); } // these are not performance tuned since their impact on the whole lightmapping duration is insignificant float lmImageMin(const float *image, int w, int h, int c, int m) { assert(c > 0 && m); float minValue = FLT_MAX; for (int i = 0; i < w * h; i++) for (int j = 0; j < c; j++) if (m & (1 << j)) minValue = lm_minf(minValue, image[i * c + j]); return minValue; } float lmImageMax(const float *image, int w, int h, int c, int m) { assert(c > 0 && m); float maxValue = 0.0f; for (int i = 0; i < w * h; i++) for (int j = 0; j < c; j++) if (m & (1 << j)) maxValue = lm_maxf(maxValue, image[i * c + j]); return maxValue; } void lmImageAdd(float *image, int w, int h, int c, float value, int m) { assert(c > 0 && m); for (int i = 0; i < w * h; i++) for (int j = 0; j < c; j++) if (m & (1 << j)) image[i * c + j] += value; } void lmImageScale(float *image, int w, int h, int c, float factor, int m) { assert(c > 0 && m); for (int i = 0; i < w * h; i++) for (int j = 0; j < c; j++) if (m & (1 << j)) image[i * c + j] *= factor; } void lmImagePower(float *image, int w, int h, int c, float exponent, int m) { assert(c > 0 && m); for (int i = 0; i < w * h; i++) for (int j = 0; j < c; j++) if (m & (1 << j)) image[i * c + j] = powf(image[i * c + j], exponent); } void lmImageDilate(const float *image, float *outImage, int w, int h, int c) { assert(c > 0 && c <= 4); for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float color[4]; lm_bool valid = LM_FALSE; for (int i = 0; i < c; i++) { color[i] = image[(y * w + x) * c + i]; valid |= color[i] > 0.0f; } if (!valid) { int n = 0; const int dx[] = { -1, 0, 1, 0 }; const int dy[] = { 0, 1, 0, -1 }; for (int d = 0; d < 4; d++) { int cx = x + dx[d]; int cy = y + dy[d]; if (cx >= 0 && cx < w && cy >= 0 && cy < h) { float dcolor[4]; lm_bool dvalid = LM_FALSE; for (int i = 0; i < c; i++) { dcolor[i] = image[(cy * w + cx) * c + i]; dvalid |= dcolor[i] > 0.0f; } if (dvalid) { for (int i = 0; i < c; i++) color[i] += dcolor[i]; n++; } } } if (n) { float in = 1.0f / n; for (int i = 0; i < c; i++) color[i] *= in; } } for (int i = 0; i < c; i++) outImage[(y * w + x) * c + i] = color[i]; } } } void lmImageSmooth(const float *image, float *outImage, int w, int h, int c) { assert(c > 0 && c <= 4); for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float color[4] = {0}; int n = 0; for (int dy = -1; dy <= 1; dy++) { int cy = y + dy; for (int dx = -1; dx <= 1; dx++) { int cx = x + dx; if (cx >= 0 && cx < w && cy >= 0 && cy < h) { lm_bool valid = LM_FALSE; for (int i = 0; i < c; i++) valid |= image[(cy * w + cx) * c + i] > 0.0f; if (valid) { for (int i = 0; i < c; i++) color[i] += image[(cy * w + cx) * c + i]; n++; } } } } for (int i = 0; i < c; i++) outImage[(y * w + x) * c + i] = n ? color[i] / n : 0.0f; } } } void lmImageDownsample(const float *image, float *outImage, int w, int h, int c) { assert(c > 0 && c <= 4); for (int y = 0; y < h / 2; y++) { for (int x = 0; x < w / 2; x++) { int p0 = 2 * (y * w + x) * c; int p1 = p0 + w * c; int valid[2][2] = {0}; float sums[4] = {0}; for (int i = 0; i < c; i++) { valid[0][0] |= image[p0 + i] != 0.0f ? 1 : 0; valid[0][1] |= image[p0 + c + i] != 0.0f ? 1 : 0; valid[1][0] |= image[p1 + i] != 0.0f ? 1 : 0; valid[1][1] |= image[p1 + c + i] != 0.0f ? 1 : 0; sums[i] += image[p0 + i] + image[p0 + c + i] + image[p1 + i] + image[p1 + c + i]; } int n = valid[0][0] + valid[0][1] + valid[1][0] + valid[1][1]; int p = (y * w / 2 + x) * c; for (int i = 0; i < c; i++) outImage[p + i] = n ? sums[i] / n : 0.0f; } } } void lmImageFtoUB(const float *image, unsigned char *outImage, int w, int h, int c, float max) { assert(c > 0); float scale = 255.0f / (max != 0.0f ? max : lmImageMax(image, w, h, c, LM_ALL_CHANNELS)); for (int i = 0; i < w * h * c; i++) outImage[i] = (unsigned char)lm_minf(lm_maxf(image[i] * scale, 0.0f), 255.0f); } // TGA output helpers static void lm_swapRandBub(unsigned char *image, int w, int h, int c) { assert(c >= 3); for (int i = 0; i < w * h * c; i += c) LM_SWAP(unsigned char, image[i], image[i + 2]); } lm_bool lmImageSaveTGAub(const char *filename, const unsigned char *image, int w, int h, int c) { assert(c == 1 || c == 3 || c == 4); lm_bool isGreyscale = c == 1; lm_bool hasAlpha = c == 4; unsigned char header[18] = { 0, 0, (unsigned char)(isGreyscale ? 3 : 2), 0, 0, 0, 0, 0, 0, 0, 0, 0, (unsigned char)(w & 0xff), (unsigned char)((w >> 8) & 0xff), (unsigned char)(h & 0xff), (unsigned char)((h >> 8) & 0xff), (unsigned char)(8 * c), (unsigned char)(hasAlpha ? 8 : 0) }; #if defined(_MSC_VER) && _MSC_VER >= 1400 FILE *file; if (fopen_s(&file, filename, "wb") != 0) return LM_FALSE; #else FILE *file = fopen(filename, "wb"); if (!file) return LM_FALSE; #endif fwrite(header, 1, sizeof(header), file); // we make sure to swap it back! trust me. :) if (!isGreyscale) lm_swapRandBub((unsigned char*)image, w, h, c); fwrite(image, 1, w * h * c , file); if (!isGreyscale) lm_swapRandBub((unsigned char*)image, w, h, c); fclose(file); return LM_TRUE; } lm_bool lmImageSaveTGAf(const char *filename, const float *image, int w, int h, int c, float max) { unsigned char *temp = (unsigned char*)LM_CALLOC(w * h * c, sizeof(unsigned char)); lmImageFtoUB(image, temp, w, h, c, max); lm_bool success = lmImageSaveTGAub(filename, temp, w, h, c); LM_FREE(temp); return success; } #endif // LIGHTMAPPER_IMPLEMENTATION