wip: light probes + compute variant

main
Dominik Madarász 2024-08-28 15:21:54 +02:00
parent 0b8d85f81c
commit 61b94ed225
6 changed files with 2427 additions and 21 deletions

View File

@ -61,21 +61,29 @@ int main(int argc, char** argv) {
static bool first_time = true;
static bool animate_probe_pos = false;
static bool follow_cam = false;
static vec3 probe_pos;
if (input_down(KEY_T)) {
animate_probe_pos = !animate_probe_pos;
}
if (input_down(KEY_F)) {
follow_cam = !follow_cam;
}
if (animate_probe_pos) {
probe_pos = vec3(0, 5, 0);
probe_pos.x = sinf(window_time()*2)*2.0f;
}
if (input_down(KEY_SPACE) || first_time || animate_probe_pos) {
if (input_down(KEY_SPACE) || first_time || animate_probe_pos || follow_cam) {
first_time = false;
mat44 probe_proj, probe_view;
if (!animate_probe_pos) {
probe_pos = cam.position;
}
cubemap_bake_begin(&env_probe.cubemap, probe_pos, 1024, 1024);
if (follow_cam) {
probe_pos = cam.position;
}
unsigned tex_size = 256;
cubemap_bake_begin(&env_probe.cubemap, probe_pos, tex_size, tex_size);
while (cubemap_bake_step(&env_probe.cubemap, probe_proj, probe_view)) {
skybox_render(&sky, probe_proj, probe_view);
shader_bind(mdl.program);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
layout(local_size_x = 16, local_size_y = 16) in;
uniform sampler2D inputTexture;
layout(std430, binding = 1) buffer SHCoefficients {
vec3 sh[9];
};
layout(std430, binding = 2) buffer WorkGroupResults {
vec3 wg_results[];
};
uniform float skyIntensity;
uniform ivec2 textureSize;
uniform int faceIndex;
uniform int pass;
shared vec3 sh_local[9];
const float PI = 3.14159265359;
const vec3 skyDir[6] = vec3[6](
vec3( 1, 0, 0), vec3(-1, 0, 0),
vec3( 0, 1, 0), vec3( 0,-1, 0),
vec3( 0, 0, 1), vec3( 0, 0,-1)
);
const vec3 skyX[6] = vec3[6](
vec3( 0, 0,-1), vec3( 0, 0, 1),
vec3( 1, 0, 0), vec3( 1, 0, 0),
vec3( 1, 0, 0), vec3(-1, 0, 0)
);
const vec3 skyY[6] = vec3[6](
vec3( 0, 1, 0), vec3( 0, 1, 0),
vec3( 0, 0,-1), vec3( 0, 0, 1),
vec3( 0, 1, 0), vec3( 0, 1, 0)
);
void main() {
if (pass == 0) {
// First pass: process texels and accumulate in work groups
if (gl_LocalInvocationIndex == 0) {
for (int i = 0; i < 9; ++i) {
sh_local[i] = vec3(0.0);
}
}
barrier();
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
if (any(greaterThanEqual(texel, textureSize))) return;
vec2 uv = (vec2(texel) + 0.5) / vec2(textureSize);
vec3 n = normalize(skyDir[faceIndex] +
skyX[faceIndex] * (2.0 * uv.x - 1.0) +
skyY[faceIndex] * (1.0 - 2.0 * uv.y));
vec3 color = textureLod(inputTexture, uv, 0).rgb;
float l = length(n);
vec3 light = color * (1.0 / (l * l * l)) * skyIntensity;
// Accumulate SH coefficients
vec3 sh_[9];
sh_[0] = light * 0.282095f;
sh_[1] = light * (-0.488603f * n.y * 2.0 / 3.0);
sh_[2] = light * (0.488603f * n.z * 2.0 / 3.0);
sh_[3] = light * (-0.488603f * n.x * 2.0 / 3.0);
sh_[4] = light * (1.092548f * n.x * n.y / 4.0);
sh_[5] = light * (-1.092548f * n.y * n.z / 4.0);
sh_[6] = light * (0.315392f * (3.0f * n.z * n.z - 1.0f) / 4.0);
sh_[7] = light * (-1.092548f * n.x * n.z / 4.0);
sh_[8] = light * (0.546274f * (n.x * n.x - n.y * n.y) / 4.0);
// Add to local SH coefficients
for (int i = 0; i < 9; ++i) {
sh_local[i] += sh_[i];
}
barrier();
// Store work group results
if (gl_LocalInvocationIndex == 0) {
uint wg_index = gl_WorkGroupID.y * gl_NumWorkGroups.x + gl_WorkGroupID.x;
for (int i = 0; i < 9; ++i) {
wg_results[wg_index * 9 + i] = sh_local[i];
}
}
} else {
// Second pass: combine work group results
if (gl_WorkGroupID.x == 0 && gl_WorkGroupID.y == 0) {
vec3 total_sh[9] = vec3[9](vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0), vec3(0));
uint num_work_groups = gl_NumWorkGroups.x * gl_NumWorkGroups.y;
for (uint i = gl_LocalInvocationIndex; i < num_work_groups; i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) {
for (int j = 0; j < 9; ++j) {
total_sh[j] += wg_results[i * 9 + j];
}
}
barrier();
if (gl_LocalInvocationIndex < 9) {
sh[gl_LocalInvocationIndex] += total_sh[gl_LocalInvocationIndex];
}
}
}
}

View File

@ -383710,9 +383710,92 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->id = 0;
}
#if 0
static unsigned sh_shader = -1, sh_buffer = -1, wg_buffer = -1, u_intensity = -1, u_size = -1, u_face_index = -1, u_texture = -1, u_step = -1, u_pass = -1;
do_once {
sh_shader = compute(vfs_read("shaders/cubemap_sh.glsl"));
glGenBuffers(1, &sh_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
u_texture = glGetUniformLocation(sh_shader, "cubeFace");
u_intensity = glGetUniformLocation(sh_shader, "skyIntensity");
u_size = glGetUniformLocation(sh_shader, "textureSize");
u_face_index = glGetUniformLocation(sh_shader, "faceIndex");
u_step = glGetUniformLocation(sh_shader, "step");
u_pass = glGetUniformLocation(sh_shader, "pass");
}
// Prepare work group buffer
glGenBuffers(1, &wg_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, wg_buffer);
int num_work_groups = ((c->width + 15) / 16) * ((c->height + 15) / 16);
glBufferData(GL_SHADER_STORAGE_BUFFER, num_work_groups * 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, wg_buffer);
// Clear SH buffer
vec3 zero = vec3(0,0,0);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_RGB32F, GL_RGB, GL_FLOAT, &zero);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, sh_buffer);
// Set up render parameters
int step = 16;
shader_bind(sh_shader);
glUniform1f(u_intensity, sky_intensity);
glUniform2i(u_size, c->width, c->height);
for (int i = 0; i < 6; i++) {
// Bind texture to texture unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, c->textures[i]);
glUniform1i(u_texture, 0);
// Set up face index
glUniform1i(u_face_index, i);
// Dispatch compute shader
glUniform1i(u_pass, 0);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glUniform1i(u_pass, 1);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
// Copy SH coefficients from buffer to array
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, 9 * sizeof(vec3), c->sh);
// Normalize SH coefficients
int total_samples = 16 * 2 * 6;
for (int s = 0; s < 9; s++) {
c->sh[s] = scale3(c->sh[s], 32.f / total_samples);
// c->sh[s] = scale3(c->sh[s], 4.f * M_PI / total_samples);
}
glDeleteBuffers(1, &wg_buffer);
// Generate cubemap texture
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
// Copy each face of the cubemap to the cubemap texture
for (int i = 0; i < 6; ++i) {
glCopyImageSubData(c->textures[i], GL_TEXTURE_2D, 0, 0, 0, 0,
c->id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, 0, 0, 0,
c->width, c->height, 1);
}
// Generate mipmaps
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#else
int samples = 0;
for (int i = 0; i < 6; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, c->framebuffers[i]);
@ -383756,12 +383839,16 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->sh[s] = scale3(c->sh[s], 32.f / samples);
}
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#endif
glBindFramebuffer(GL_FRAMEBUFFER, sky_last_fb);
glViewport(sky_last_vp[0], sky_last_vp[1], sky_last_vp[2], sky_last_vp[3]);

View File

@ -2026,9 +2026,92 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->id = 0;
}
#if 0
static unsigned sh_shader = -1, sh_buffer = -1, wg_buffer = -1, u_intensity = -1, u_size = -1, u_face_index = -1, u_texture = -1, u_step = -1, u_pass = -1;
do_once {
sh_shader = compute(vfs_read("shaders/cubemap_sh.glsl"));
glGenBuffers(1, &sh_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
u_texture = glGetUniformLocation(sh_shader, "cubeFace");
u_intensity = glGetUniformLocation(sh_shader, "skyIntensity");
u_size = glGetUniformLocation(sh_shader, "textureSize");
u_face_index = glGetUniformLocation(sh_shader, "faceIndex");
u_step = glGetUniformLocation(sh_shader, "step");
u_pass = glGetUniformLocation(sh_shader, "pass");
}
// Prepare work group buffer
glGenBuffers(1, &wg_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, wg_buffer);
int num_work_groups = ((c->width + 15) / 16) * ((c->height + 15) / 16);
glBufferData(GL_SHADER_STORAGE_BUFFER, num_work_groups * 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, wg_buffer);
// Clear SH buffer
vec3 zero = vec3(0,0,0);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_RGB32F, GL_RGB, GL_FLOAT, &zero);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, sh_buffer);
// Set up render parameters
int step = 16;
shader_bind(sh_shader);
glUniform1f(u_intensity, sky_intensity);
glUniform2i(u_size, c->width, c->height);
for (int i = 0; i < 6; i++) {
// Bind texture to texture unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, c->textures[i]);
glUniform1i(u_texture, 0);
// Set up face index
glUniform1i(u_face_index, i);
// Dispatch compute shader
glUniform1i(u_pass, 0);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glUniform1i(u_pass, 1);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
// Copy SH coefficients from buffer to array
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, 9 * sizeof(vec3), c->sh);
// Normalize SH coefficients
int total_samples = 16 * 2 * 6;
for (int s = 0; s < 9; s++) {
c->sh[s] = scale3(c->sh[s], 32.f / total_samples);
// c->sh[s] = scale3(c->sh[s], 4.f * M_PI / total_samples);
}
glDeleteBuffers(1, &wg_buffer);
// Generate cubemap texture
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
// Copy each face of the cubemap to the cubemap texture
for (int i = 0; i < 6; ++i) {
glCopyImageSubData(c->textures[i], GL_TEXTURE_2D, 0, 0, 0, 0,
c->id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, 0, 0, 0,
c->width, c->height, 1);
}
// Generate mipmaps
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#else
int samples = 0;
for (int i = 0; i < 6; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, c->framebuffers[i]);
@ -2072,12 +2155,16 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->sh[s] = scale3(c->sh[s], 32.f / samples);
}
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#endif
glBindFramebuffer(GL_FRAMEBUFFER, sky_last_fb);
glViewport(sky_last_vp[0], sky_last_vp[1], sky_last_vp[2], sky_last_vp[3]);

View File

@ -18825,9 +18825,92 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->id = 0;
}
#if 0
static unsigned sh_shader = -1, sh_buffer = -1, wg_buffer = -1, u_intensity = -1, u_size = -1, u_face_index = -1, u_texture = -1, u_step = -1, u_pass = -1;
do_once {
sh_shader = compute(vfs_read("shaders/cubemap_sh.glsl"));
glGenBuffers(1, &sh_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
u_texture = glGetUniformLocation(sh_shader, "cubeFace");
u_intensity = glGetUniformLocation(sh_shader, "skyIntensity");
u_size = glGetUniformLocation(sh_shader, "textureSize");
u_face_index = glGetUniformLocation(sh_shader, "faceIndex");
u_step = glGetUniformLocation(sh_shader, "step");
u_pass = glGetUniformLocation(sh_shader, "pass");
}
// Prepare work group buffer
glGenBuffers(1, &wg_buffer);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, wg_buffer);
int num_work_groups = ((c->width + 15) / 16) * ((c->height + 15) / 16);
glBufferData(GL_SHADER_STORAGE_BUFFER, num_work_groups * 9 * sizeof(vec3), NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, wg_buffer);
// Clear SH buffer
vec3 zero = vec3(0,0,0);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, sh_buffer);
glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_RGB32F, GL_RGB, GL_FLOAT, &zero);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, sh_buffer);
// Set up render parameters
int step = 16;
shader_bind(sh_shader);
glUniform1f(u_intensity, sky_intensity);
glUniform2i(u_size, c->width, c->height);
for (int i = 0; i < 6; i++) {
// Bind texture to texture unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, c->textures[i]);
glUniform1i(u_texture, 0);
// Set up face index
glUniform1i(u_face_index, i);
// Dispatch compute shader
glUniform1i(u_pass, 0);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glUniform1i(u_pass, 1);
glDispatchCompute((c->width+step-1)/step, (c->height+step-1)/step, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
// Copy SH coefficients from buffer to array
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, 9 * sizeof(vec3), c->sh);
// Normalize SH coefficients
int total_samples = 16 * 2 * 6;
for (int s = 0; s < 9; s++) {
c->sh[s] = scale3(c->sh[s], 32.f / total_samples);
// c->sh[s] = scale3(c->sh[s], 4.f * M_PI / total_samples);
}
glDeleteBuffers(1, &wg_buffer);
// Generate cubemap texture
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
// Copy each face of the cubemap to the cubemap texture
for (int i = 0; i < 6; ++i) {
glCopyImageSubData(c->textures[i], GL_TEXTURE_2D, 0, 0, 0, 0,
c->id, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, 0, 0, 0, 0,
c->width, c->height, 1);
}
// Generate mipmaps
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#else
int samples = 0;
for (int i = 0; i < 6; i++) {
glBindFramebuffer(GL_FRAMEBUFFER, c->framebuffers[i]);
@ -18871,12 +18954,16 @@ void cubemap_bake_end(cubemap_t *c, float sky_intensity) {
c->sh[s] = scale3(c->sh[s], 32.f / samples);
}
glGenTextures(1, &c->id);
glBindTexture(GL_TEXTURE_CUBE_MAP, c->id);
glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
#endif
glBindFramebuffer(GL_FRAMEBUFFER, sky_last_fb);
glViewport(sky_last_vp[0], sky_last_vp[1], sky_last_vp[2], sky_last_vp[3]);