engine/dep/include/FidelityFX/gpu/brixelizer/ffx_brixelizer_cascade_ops.h

1477 lines
82 KiB
C

// This file is part of the FidelityFX SDK.
//
// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef FFX_BRIXELIZER_CASCADE_OPS_H
#define FFX_BRIXELIZER_CASCADE_OPS_H
#include "ffx_brixelizer_host_gpu_shared_private.h"
#include "ffx_brixelizer_brick_common_private.h"
#include "ffx_brixelizer_build_common.h"
#include "ffx_brixelizer_common_private.h"
#include "ffx_brixelizer_mesh_common.h"
void FfxBrixelizerClearRefCounter(FfxUInt32 idx)
{
StoreScratchCR1RefCounter(idx, FfxUInt32(0));
StoreScratchVoxelAllocationFailCounter(idx, FfxUInt32(0));
}
// Search for n where a[n] <= offset and a[n+1] > offset
#define LOWER_BOUND(offset, total_count) \
{ \
FfxUInt32 cursor = FfxUInt32(0); \
FfxUInt32 size = total_count; \
while (size > FfxUInt32(0)) { \
FfxUInt32 size_half = size >> FfxUInt32(1); \
FfxUInt32 mid = cursor + size_half; \
if (LOWER_BOUND_LOAD(mid) > offset) \
size = size_half; \
else { \
cursor = mid + FfxUInt32(1); \
size = size - size_half - FfxUInt32(1); \
} \
} \
\
LOWER_BOUND_RESULT = max(cursor, FfxUInt32(1)) - FfxUInt32(1); \
}
FfxUInt32x3 WrapCoords(FfxUInt32x3 voxel_coord)
{
return (voxel_coord + GetCascadeInfoClipmapOffset()) & FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_WRAP_MASK);
}
FfxUInt32 WrapFlatCoords(FfxUInt32 voxel_idx)
{
return FfxBrixelizerFlattenPOT((FfxBrixelizerUnflattenPOT(voxel_idx, FFX_BRIXELIZER_CASCADE_DEGREE) + GetCascadeInfoClipmapOffset()) & FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_WRAP_MASK), FFX_BRIXELIZER_CASCADE_DEGREE);
}
void MarkFailed(FfxUInt32 flat_voxel_idx)
{
StoreScratchVoxelAllocationFailCounter(flat_voxel_idx, FfxUInt32(1));
}
FfxBoolean IsBuildable(FfxUInt32 voxel_idx)
{
return LoadCascadeBrickMap(WrapFlatCoords(voxel_idx)) == FFX_BRIXELIZER_UNINITIALIZED_ID;
}
FfxBoolean CanBuildThisVoxel(FfxUInt32 flat_voxel_idx)
{
if (!IsBuildable(flat_voxel_idx)) return false;
return true;
}
void AddReferenceOrMarkVoxelFailed(FfxUInt32 voxel_idx, FfxUInt32 triangle_id)
{
if (!CanBuildThisVoxel(voxel_idx)) {
return;
}
FfxUInt32 local_ref_idx;
IncrementScratchCR1RefCounter(voxel_idx, FfxUInt32(1), local_ref_idx);
FfxBrixelizerTriangleReference ref;
ref.voxel_idx = voxel_idx;
ref.triangle_id = triangle_id;
ref.local_ref_idx = local_ref_idx;
FfxUInt32 coarse_ref_offset;
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_REFERENCES, FfxUInt32(1), coarse_ref_offset);
FfxUInt32 max_references = LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_REFERENCES);
if (coarse_ref_offset < max_references) {
StoreScratchCR1Reference(coarse_ref_offset, ref);
} else {
MarkFailed(voxel_idx);
}
}
FfxUInt32 GetReferenceOffset(FfxUInt32 voxel_idx)
{
FfxUInt32 group_scan_id = voxel_idx / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE;
FfxUInt32 group_scan_value = LoadVoxelReferenceGroupSum(group_scan_id);
FfxUInt32 local_scan_value = LoadScratchCR1RefCounterScan(voxel_idx);
return group_scan_value + local_scan_value;
}
struct FfxBrixelizerCRItemPacked {
FfxUInt32 pack0;
FfxUInt32 pack1;
};
struct FfxBrixelizerCRItem {
FfxUInt32x3 bounds_min;
FfxUInt32x3 bounds_max;
};
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_voxelizer_items_ref_count[FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE];
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE];
FFX_GROUPSHARED FfxBrixelizerCRItemPacked gs_ffx_brixelizer_voxelizer_items[FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE];
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_voxelizer_item_counter;
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_voxelizer_ref_counter;
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_voxelizer_ref_offset;
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_triangle_offset_global;
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_triangle_offset;
FFX_GROUPSHARED FfxBoolean gs_ffx_brixelizer_voxelizer_has_space;
void FfxBrixelizerCRStoreItem(FfxInt32 index, FfxBrixelizerCRItem item)
{
FfxUInt32 pack0 = ((item.bounds_min.x & FfxUInt32(0x3ff)) << FfxUInt32(0)) |
((item.bounds_min.y & FfxUInt32(0x3ff)) << FfxUInt32(10)) |
((item.bounds_min.z & FfxUInt32(0x3ff)) << FfxUInt32(20));
FfxUInt32 pack1 = ((item.bounds_max.x & FfxUInt32(0x3ff)) << FfxUInt32(0)) |
((item.bounds_max.y & FfxUInt32(0x3ff)) << FfxUInt32(10)) |
((item.bounds_max.z & FfxUInt32(0x3ff)) << FfxUInt32(20));
FfxBrixelizerCRItemPacked packed;
packed.pack0 = pack0;
packed.pack1 = pack1;
gs_ffx_brixelizer_voxelizer_items[index] = packed;
}
FfxBrixelizerCRItem FfxBrixelizerCRLoadItem(FfxInt32 index)
{
FfxBrixelizerCRItemPacked pack = gs_ffx_brixelizer_voxelizer_items[index];
FfxBrixelizerCRItem item;
item.bounds_min.x = (pack.pack0 >> FfxUInt32(0)) & FfxUInt32(0x3ff);
item.bounds_min.y = (pack.pack0 >> FfxUInt32(10)) & FfxUInt32(0x3ff);
item.bounds_min.z = (pack.pack0 >> FfxUInt32(20)) & FfxUInt32(0x3ff);
item.bounds_max.x = (pack.pack1 >> FfxUInt32(0)) & FfxUInt32(0x3ff);
item.bounds_max.y = (pack.pack1 >> FfxUInt32(10)) & FfxUInt32(0x3ff);
item.bounds_max.z = (pack.pack1 >> FfxUInt32(20)) & FfxUInt32(0x3ff);
return item;
}
struct FfxBrixelizerCRVoxelTriangleBounds {
FfxFloat32x3 bound_min;
FfxFloat32x3 bound_max;
FfxUInt32x3 ubound_min;
FfxUInt32x3 ubound_max;
};
FfxBrixelizerTriangle FetchTriangle(FfxBrixelizerBasicMeshInfo instance_info, FfxUInt32 instance_id, FfxUInt32 job_idx, FfxUInt32 triangle_index)
{
FfxBrixelizerTrianglePos pos = FfxBrixelizerFetchTriangle(instance_info, instance_id, triangle_index);
FfxBrixelizerTriangle tri;
tri.face3 = pos.face3;
tri.job_idx = job_idx;
tri.triangle_index = triangle_index;
tri.wp0 = FfxFloat32x3(pos.wp0 - GetCascadeInfoGridMin());
tri.wp1 = FfxFloat32x3(pos.wp1 - GetCascadeInfoGridMin());
tri.wp2 = FfxFloat32x3(pos.wp2 - GetCascadeInfoGridMin());
return tri;
}
FfxBoolean GetTriangleBounds(FfxUInt32 instance_id, FfxUInt32 job_idx, FfxBrixelizerBasicMeshInfo instance_info, FfxUInt32 triangle_index, FFX_PARAMETER_OUT FfxBrixelizerTriangle tri,
FFX_PARAMETER_OUT FfxBrixelizerCRVoxelTriangleBounds tvbounds)
{
FfxUInt32 job_num_triangles = instance_info.triangleCount;
if (triangle_index < job_num_triangles) {
tri = FetchTriangle(instance_info, instance_id, job_idx, triangle_index);
FfxFloat32 inflation_size = FfxFloat32(GetCascadeInfoVoxelSize() / FfxFloat32(7.0));
tvbounds.bound_min = FfxFloat32x3(min(tri.wp0.x, min(tri.wp1.x, tri.wp2.x)),
min(tri.wp0.y, min(tri.wp1.y, tri.wp2.y)),
min(tri.wp0.z, min(tri.wp1.z, tri.wp2.z)));
tvbounds.bound_max = FfxFloat32x3(max(tri.wp0.x, max(tri.wp1.x, tri.wp2.x)),
max(tri.wp0.y, max(tri.wp1.y, tri.wp2.y)),
max(tri.wp0.z, max(tri.wp1.z, tri.wp2.z)));
FfxFloat32x3 bounds_min;
bounds_min.x = tvbounds.bound_min.x > FfxFloat32(0.0) ? tvbounds.bound_min.x : tvbounds.bound_min.x - FfxFloat32(1.0);
bounds_min.y = tvbounds.bound_min.y > FfxFloat32(0.0) ? tvbounds.bound_min.y : tvbounds.bound_min.y - FfxFloat32(1.0);
bounds_min.z = tvbounds.bound_min.z > FfxFloat32(0.0) ? tvbounds.bound_min.z : tvbounds.bound_min.z - FfxFloat32(1.0);
tvbounds.ubound_min = min(
FFX_BROADCAST_INT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION - FfxUInt32(1)),
max(
FFX_BROADCAST_INT32X3(0),
FfxInt32x3((bounds_min - FFX_BROADCAST_FLOAT32X3(inflation_size)) / FfxFloat32(GetCascadeInfoVoxelSize()))
)
);
FfxFloat32x3 bounds_max;
bounds_max.x = tvbounds.bound_max.x > FfxFloat32(0.0) ? tvbounds.bound_max.x : tvbounds.bound_max.x - FfxFloat32(1.0);
bounds_max.y = tvbounds.bound_max.y > FfxFloat32(0.0) ? tvbounds.bound_max.y : tvbounds.bound_max.y - FfxFloat32(1.0);
bounds_max.z = tvbounds.bound_max.z > FfxFloat32(0.0) ? tvbounds.bound_max.z : tvbounds.bound_max.z - FfxFloat32(1.0);
tvbounds.ubound_max = min(
FFX_BROADCAST_INT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION - FfxUInt32(1)),
max(
FFX_BROADCAST_INT32X3(0),
FfxInt32x3((bounds_max + FFX_BROADCAST_FLOAT32X3(inflation_size)) / FfxFloat32(GetCascadeInfoVoxelSize()))
)
) +
FFX_BROADCAST_INT32X3(1);
return all(FFX_LESS_THAN_EQUAL(tvbounds.bound_min, FfxFloat32x3(GetCascadeInfoGridMax() - GetCascadeInfoGridMin()) + FFX_BROADCAST_FLOAT32X3(inflation_size))) &&
all(FFX_GREATER_THAN_EQUAL(tvbounds.bound_max, FFX_BROADCAST_FLOAT32X3(0.0) + FFX_BROADCAST_FLOAT32X3(-inflation_size)));
}
return false;
}
void FfxBrixelizerStoreTriangleCenter(FfxUInt32 triangle_id_swap_offset, FfxBrixelizerTriangle tri)
{
StoreScratchIndexSwapFloat3(triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT, (tri.wp0 + tri.wp1 + tri.wp2) / FfxFloat32(3.0));
}
FfxFloat32x3 FfxBrixelizerLoadTriangleCenter(FfxUInt32 triangle_id_swap_offset)
{
return LoadScratchIndexSwapFloat3(triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT);
}
struct FfxBrixelizerTrianglePartial {
FfxFloat32x3 wp0;
FfxFloat32x3 wp1;
FfxFloat32x3 wp2;
};
struct FfxBrixelizerTrianglePartialCompressed {
FfxUInt32x2 wp0xy;
FfxUInt32x2 ed0;
FfxUInt32x2 ed1;
};
// Compress to f32x3 v0 and f16x3 e0, e1 and store
void FfxBrixelizerStoreTrianglePartial(FfxUInt32 triangle_id_swap_offset, FfxBrixelizerTriangle tri)
{
FfxBrixelizerTrianglePartialCompressed trip;
trip.wp0xy = ffxAsUInt32(tri.wp0.xy);
FfxFloat32x3 e0 = tri.wp1 - tri.wp0;
FfxFloat32x3 e1 = tri.wp2 - tri.wp0;
FfxFloat32x4 v0 = FfxFloat32x4(e0.xyz, e1.x);
trip.ed0 = ffxPackF32x2(v0);
trip.ed1.x = ffxPackF32(e1.yz);
trip.ed1.y = ffxAsUInt32(tri.wp0.z);
StoreScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(0), trip.wp0xy);
StoreScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(2), trip.ed0);
StoreScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(4), trip.ed1);
}
void FfxBrixelizerLoadTrianglePartial(FfxUInt32 triangle_id_swap_offset, FFX_PARAMETER_OUT FfxBrixelizerTrianglePartial tri)
{
FfxBrixelizerTrianglePartialCompressed tripc;
tripc.wp0xy = LoadScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(0));
tripc.ed0 = LoadScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(2));
tripc.ed1 = LoadScratchIndexSwapUInt2((triangle_id_swap_offset / FFX_BRIXELIZER_SIZEOF_UINT) + FfxUInt32(4));
tri.wp0.xy = ffxAsFloat(tripc.wp0xy.xy);
tri.wp0.z = ffxAsFloat(tripc.ed1.y);
FfxFloat32x4 v0 = ffxUnpackF32x2(tripc.ed0);
FfxFloat32x2 v1 = ffxUnpackF32(tripc.ed1.x);
tri.wp1 = tri.wp0 + FfxFloat32x3(v0.xyz);
tri.wp2 = tri.wp0 + FfxFloat32x3(v0.w, v1.xy);
}
// Integer scan
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_scan_buffer[FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE];
FFX_GROUPSHARED FfxUInt32 gs_ffx_brixelizer_scan_group_id;
FfxUInt32 GroupScanExclusiveAdd(FfxUInt32 gid, FfxUInt32 group_size)
{
FfxUInt32 sum = FfxUInt32(0);
for (FfxUInt32 stride = FfxUInt32(1); stride <= (group_size >> FfxUInt32(1)); stride <<= FfxUInt32(1)) {
if (gid < group_size / (FfxUInt32(2) * stride)) {
gs_ffx_brixelizer_scan_buffer[FfxUInt32(2) * (gid + FfxUInt32(1)) * stride - FfxUInt32(1)] += gs_ffx_brixelizer_scan_buffer[(FfxUInt32(2) * gid + FfxUInt32(1)) * stride - FfxUInt32(1)];
}
FFX_GROUP_MEMORY_BARRIER;
}
if (gid == FfxUInt32(0)) {
sum = gs_ffx_brixelizer_scan_buffer[group_size - FfxUInt32(1)];
gs_ffx_brixelizer_scan_buffer[group_size - FfxUInt32(1)] = FfxUInt32(0);
}
FFX_GROUP_MEMORY_BARRIER;
for (FfxUInt32 stride = (group_size >> FfxUInt32(1)); stride > FfxUInt32(0); stride >>= FfxUInt32(1)) {
if (gid < group_size / (FfxUInt32(2) * stride)) {
FfxUInt32 tmp = gs_ffx_brixelizer_scan_buffer[(FfxUInt32(2) * gid + FfxUInt32(1)) * stride - FfxUInt32(1)];
gs_ffx_brixelizer_scan_buffer[(FfxUInt32(2) * gid + FfxUInt32(1)) * stride - FfxUInt32(1)] = gs_ffx_brixelizer_scan_buffer[FfxUInt32(2) * (gid + FfxUInt32(1)) * stride - FfxUInt32(1)];
gs_ffx_brixelizer_scan_buffer[FfxUInt32(2) * (gid + FfxUInt32(1)) * stride - FfxUInt32(1)] = gs_ffx_brixelizer_scan_buffer[FfxUInt32(2) * (gid + FfxUInt32(1)) * stride - FfxUInt32(1)] + tmp;
}
FFX_GROUP_MEMORY_BARRIER;
}
return sum;
}
FfxUInt32 LoadJobTriangleCountScan(FfxUInt32 job_idx)
{
return LoadScratchJobCountersScan(job_idx) + LoadGlobalJobTriangleCounterScan(job_idx / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE);
}
FfxUInt32 JobLowerBound(FfxUInt32 triangle_offset, FfxUInt32 total_job_count)
{
#define LOWER_BOUND_LOAD(mid) LoadJobTriangleCountScan(mid)
FfxUInt32 LOWER_BOUND_RESULT;
LOWER_BOUND(triangle_offset, total_job_count);
#undef LOWER_BOUND_LOAD
return LOWER_BOUND_RESULT;
}
FfxUInt32 JobLowerBoundBySubvoxel(FfxUInt32 subvoxel_offset, FfxUInt32 total_job_count)
{
#define LOWER_BOUND_LOAD(mid) LoadJobIndex(mid)
FfxUInt32 LOWER_BOUND_RESULT;
LOWER_BOUND(subvoxel_offset, total_job_count);
#undef LOWER_BOUND_LOAD
return LOWER_BOUND_RESULT;
}
// One group performs global scan for all the other groups
#define GROUP_SCAN(gid, total_group_count, group_size, LoadGlobal, StoreGlobal) \
{ \
FFX_GROUP_MEMORY_BARRIER; \
if (gid == FfxUInt32(0)) gs_ffx_brixelizer_scan_group_id = FfxUInt32(0); \
FFX_GROUP_MEMORY_BARRIER; \
for (FfxUInt32 cursor = FfxUInt32(0); cursor < total_group_count; cursor += group_size) { \
FFX_GROUP_MEMORY_BARRIER; \
if (gid + cursor < total_group_count) \
gs_ffx_brixelizer_scan_buffer[gid] = LoadGlobal(gid + cursor); \
else \
gs_ffx_brixelizer_scan_buffer[gid] = FfxUInt32(0); \
FFX_GROUP_MEMORY_BARRIER; \
FfxUInt32 sum = GroupScanExclusiveAdd(gid, group_size); \
\
if (gid + cursor < total_group_count) StoreGlobal(gid + cursor, gs_ffx_brixelizer_scan_buffer[gid] + gs_ffx_brixelizer_scan_group_id); \
\
FFX_GROUP_MEMORY_BARRIER; \
\
if (gid == FfxUInt32(0)) gs_ffx_brixelizer_scan_group_id += sum; \
} \
}
// Used for group scan macros
FfxUInt32 StampLowerBound(FfxUInt32 item_id)
{
#define LOWER_BOUND_LOAD(mid) LoadScratchCR1StampScan(mid)
FfxUInt32 LOWER_BOUND_RESULT;
LOWER_BOUND(item_id, FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION);
#undef LOWER_BOUND_LOAD
return LOWER_BOUND_RESULT;
}
void AddBrickToCompressionList(FfxUInt32 brick_id)
{
FfxUInt32 offset;
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_COMPRESSION_BRICKS, FfxUInt32(1), offset);
StoreScratchBricksCompressionList(offset, brick_id);
}
FfxUInt32 AllocateBrick()
{
FfxUInt32 brick_idx;
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_NUM_BRICKS_ALLOCATED, FfxUInt32(1), brick_idx);
if (brick_idx > GetBuildInfo().max_bricks_per_bake) {
return FFX_BRIXELIZER_INVALID_ID;
}
FfxUInt32 val;
IncrementContextCounter(FFX_BRIXELIZER_CONTEXT_COUNTER_BRICK_COUNT, FfxUInt32(1), val);
if (val >= LoadContextCounter(FFX_BRIXELIZER_CONTEXT_COUNTER_FREE_BRICKS)) return FFX_BRIXELIZER_INVALID_ID;
FfxUInt32 brick_id = LoadBricksFreeList(val);
return brick_id;
}
void MapBrickToVoxel(FfxUInt32 brick_id, FfxUInt32 voxel_id)
{
voxel_id |= (GetCascadeInfoIndex()) << FFX_BRIXELIZER_CASCADE_ID_SHIFT;
StoreBricksVoxelMap(FfxBrixelizerBrickGetIndex(brick_id), voxel_id);
}
FfxUInt32 BrickGetStorageOffset(FfxUInt32 brick_id)
{
return LoadScratchBricksStorageOffsets(FfxBrixelizerBrickGetIndex(brick_id));
}
FfxUInt32 AllocateStorage(FfxUInt32 brick_id)
{
FfxUInt32 dim = FfxUInt32(8);
FfxUInt32 size = dim * dim * dim * FfxUInt32(4);
FfxUInt32 offset;
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_STORAGE_OFFSET, size, offset);
if (offset + size > LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_STORAGE_SIZE)) {
StoreScratchBricksStorageOffsets(FfxBrixelizerBrickGetIndex(brick_id), FFX_BRIXELIZER_INVALID_ALLOCATION);
return FFX_BRIXELIZER_INVALID_ALLOCATION;
}
StoreScratchBricksStorageOffsets(FfxBrixelizerBrickGetIndex(brick_id), offset);
return offset;
}
void AppendClearBrick(FfxUInt32 brick_id)
{
FfxUInt32 offset;
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_CLEAR_BRICKS, 1, offset);
StoreScratchBricksClearList(offset, brick_id);
}
// Utilities for 32 scratch space for atomics to work
FfxFloat32 LoadBrixelData32(FfxUInt32 brick_id, FfxInt32x3 coord)
{
FfxInt32 brick_dim = FfxInt32(8);
if (any(FFX_GREATER_THAN_EQUAL(coord, FFX_BROADCAST_INT32X3(brick_dim))) || any(FFX_LESS_THAN(coord, FFX_BROADCAST_INT32X3(0)))) return FfxFloat32(1.0);
const FfxUInt32 brick_size = brick_dim * brick_dim * brick_dim * FFX_BRIXELIZER_SIZEOF_UINT;
FfxUInt32 offset = FfxBrixelizerFlattenPOT(coord, 3);
offset += BrickGetStorageOffset(brick_id) / FFX_BRIXELIZER_SIZEOF_UINT;
FfxUInt32 uval = LoadScratchBricksStorage(offset);
return FfxBrixelizerUnpackDistance(uval);
}
void BrickInterlockedMin32(FfxUInt32 brick_id, FfxInt32x3 coord, FfxUInt32 uval)
{
FfxInt32 brick_dim = 8;
if (any(FFX_GREATER_THAN_EQUAL(coord, FFX_BROADCAST_INT32X3(brick_dim))) || any(FFX_LESS_THAN(coord, FFX_BROADCAST_INT32X3(0)))) return;
const FfxUInt32 brick_size = brick_dim * brick_dim * brick_dim * FFX_BRIXELIZER_SIZEOF_UINT;
FfxUInt32 offset = FfxBrixelizerFlattenPOT(coord, 3) * FFX_BRIXELIZER_SIZEOF_UINT;
offset += BrickGetStorageOffset(brick_id);
MinScratchBricksStorage(offset / FFX_BRIXELIZER_SIZEOF_UINT, uval);
}
void BrickInterlockedMin32(FfxUInt32 brick_id, FfxInt32x3 coord, FfxFloat32 fval)
{
BrickInterlockedMin32(brick_id, coord, FfxBrixelizerPackDistance(fval));
}
void ClearBrixelData32(FfxUInt32 brick_id, FfxInt32x3 coord)
{
FfxInt32 brick_dim = 8;
if (any(FFX_GREATER_THAN_EQUAL(coord, FFX_BROADCAST_INT32X3(brick_dim))) || any(FFX_LESS_THAN(coord, FFX_BROADCAST_INT32X3(0)))) return;
FfxUInt32 offset = FfxBrixelizerFlattenPOT(coord, 3) * FFX_BRIXELIZER_SIZEOF_UINT;
offset += BrickGetStorageOffset(brick_id);
StoreScratchBricksStorage(offset / FFX_BRIXELIZER_SIZEOF_UINT, FfxBrixelizerPackDistance(FfxFloat32(1.0)));
}
void InitializeIndirectArgs(FfxUInt32 subvoxel_count)
{
{
FfxUInt32 tier_cnt = LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_CLEAR_BRICKS);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_CLEAR_BRICKS_32 + 0, tier_cnt * 8);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_CLEAR_BRICKS_32 + 1, 1);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_CLEAR_BRICKS_32 + 2, 1);
}
{
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_EMIT_SDF_32 + 0, subvoxel_count);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_EMIT_SDF_32 + 1, 1);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_EMIT_SDF_32 + 2, 1);
}
{
FfxUInt32 tier_cnt = LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_COMPRESSION_BRICKS);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPRESS_32 + 0, tier_cnt);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPRESS_32 + 1, 1);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPRESS_32 + 2, 1);
}
{
FfxUInt32 total_cell_count = FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION;
FfxUInt32 total_references = min(LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_REFERENCES), LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_REFERENCES));
FfxUInt32 total_thread_count = max(total_cell_count, total_references);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPACT_REFERENCES_32 + 0, (total_thread_count + FFX_BRIXELIZER_STATIC_CONFIG_COMPACT_REFERENCES_GROUP_SIZE - 1) / FFX_BRIXELIZER_STATIC_CONFIG_COMPACT_REFERENCES_GROUP_SIZE);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPACT_REFERENCES_32 + 1, 1);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_COMPACT_REFERENCES_32 + 2, 1);
}
}
void InitializeJobIndirectArgs(FfxUInt32 num_triangles)
{
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_VOXELIZE_32 + 0, (num_triangles + FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE - 1) / FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_VOXELIZE_32 + 1, 1);
StoreIndirectArgs(FFX_BRIXELIZER_INDIRECT_OFFSETS_VOXELIZE_32 + 2, 1);
}
void FfxBrixelizerClearBuildCounters()
{
for (FfxUInt32 i = FfxUInt32(0); i < FFX_BRIXELIZER_NUM_SCRATCH_COUNTERS; i++) {
StoreScratchCounter(i, FfxUInt32(0));
}
FfxUInt32 storage_size;
GetScratchBricksStorageDimensions(storage_size);
StoreScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_STORAGE_SIZE, storage_size);
GetScratchIndexSwapDimensions(storage_size);
StoreScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_TRIANGLES, storage_size);
GetScratchMaxReferences(storage_size);
StoreScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_REFERENCES, storage_size);
for (FfxUInt32 i = 0; i < FFX_BROADCAST_UINT32(FFX_BRIXELIZER_NUM_INDIRECT_OFFSETS) * FFX_BROADCAST_UINT32(FFX_BRIXELIZER_STATIC_CONFIG_INDIRECT_DISPATCH_STRIDE32); i++) {
StoreIndirectArgs(i, FfxUInt32(0));
}
}
void FfxBrixelizerResetCascade(FfxUInt32 tid)
{
if (tid < FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION) {
FfxUInt32 brick_id = LoadCascadeBrickMap(tid);
if (FfxBrixelizerIsValidID(brick_id)) {
FfxBrixelizerMarkBrickFree(brick_id);
}
StoreCascadeBrickMap(tid, FFX_BRIXELIZER_UNINITIALIZED_ID);
}
}
void FfxBrixelizerInitializeCascade(FfxUInt32 tid)
{
if (tid < FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION) {
FfxUInt32 brick_id = LoadCascadeBrickMap(tid);
if (brick_id == FFX_BRIXELIZER_UNINITIALIZED_ID) {
StoreCascadeBrickMap(tid, FFX_BRIXELIZER_INVALID_ID);
}
}
}
void FfxBrixelizerMarkCascadeUninitialized(FfxUInt32 tid)
{
if (tid < FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION) {
StoreCascadeBrickMap(tid, FFX_BRIXELIZER_UNINITIALIZED_ID);
}
}
void FfxBrixelizerFreeCascade(FfxUInt32 tid)
{
if (tid < FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION) {
FfxUInt32 brick_id = LoadCascadeBrickMap(tid);
if (FfxBrixelizerIsValidID(brick_id)) {
FfxBrixelizerMarkBrickFree(brick_id);
}
StoreCascadeBrickMap(tid, FFX_BRIXELIZER_UNINITIALIZED_ID);
}
}
void FfxBrixelizerScrollCascade(FfxUInt32 tid)
{
if (all(FFX_LESS_THAN(FfxBrixelizerUnflattenPOT(tid, FFX_BRIXELIZER_CASCADE_DEGREE), FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)))) {
FfxInt32x3 voxel_coord = to_int3(FfxBrixelizerUnflattenPOT(tid, FFX_BRIXELIZER_CASCADE_DEGREE));
#ifdef FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
FfxUInt32 brick_id = LoadCascadeBrickMap(WrapFlatCoords(voxel_idx));
StoreCascadeBrickMap(WrapFlatCoords(voxel_idx), FFX_BRIXELIZER_UNINITIALIZED_ID);
#else // !FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
// Scrolling clipmap update
if (any(FFX_LESS_THAN(voxel_coord, -GetCascadeInfoClipmapInvalidationOffset())) || any(FFX_GREATER_THAN_EQUAL(voxel_coord, FFX_BROADCAST_INT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION) - GetCascadeInfoClipmapInvalidationOffset()))) {
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
FfxUInt32 brick_id = LoadCascadeBrickMap(WrapFlatCoords(voxel_idx));
if (FfxBrixelizerIsValidID(brick_id)) {
FfxBrixelizerMarkBrickFree(brick_id);
}
StoreCascadeBrickMap(WrapFlatCoords(voxel_idx), FFX_BRIXELIZER_UNINITIALIZED_ID);
}
#endif // !FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
}
}
void FfxBrixelizerClearRefCounters(FfxUInt32 tid)
{
FfxUInt32x3 voxel_coord = FfxBrixelizerUnflattenPOT(tid, FFX_BRIXELIZER_CASCADE_DEGREE);
if (all(FFX_LESS_THAN(voxel_coord, FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)))) {
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
FfxBrixelizerClearRefCounter(voxel_idx);
}
}
void FfxBrixelizerClearJobCounter(FfxUInt32 tid)
{
if (tid < GetBuildInfoNumJobs()) StoreScratchJobCounter(tid, FfxUInt32(0));
}
void FfxBrixelizerInvalidateJobAreas(FfxUInt32 gtid, FfxUInt32 group_id)
{
FfxUInt32 thread_subvoxel_offset = group_id * FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE + gtid;
FfxUInt32 job_idx = JobLowerBoundBySubvoxel(thread_subvoxel_offset, GetBuildInfoNumJobs());
if (job_idx < GetBuildInfoNumJobs()) {
FfxBrixelizerBrixelizationJob job = LoadBrixelizationJob(job_idx);
FfxUInt32 subvoxel_id = thread_subvoxel_offset - LoadJobIndex(job_idx);
FfxInt32x3 dim = FfxInt32x3(job.aabbMax - job.aabbMin);
ffxassert(all(job.aabbMax > FFX_BROADCAST_UINT32X3(0)));
ffxassert(all(job.aabbMin >= FFX_BROADCAST_UINT32X3(0)));
ffxassert(all(job.aabbMin < FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
ffxassert(all(job.aabbMax > job.aabbMin));
ffxassert(all(job.aabbMax <= FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
if (FFX_HAS_FLAG(job.flags, FFX_BRIXELIZER_JOB_FLAG_INVALIDATE)) {
if (subvoxel_id < dim.x * dim.y * dim.z) {
FfxUInt32x3 subvoxel_coord = FfxBrixelizerUnflatten(subvoxel_id, dim);
FfxUInt32x3 global_voxel_coord = subvoxel_coord + job.aabbMin;
ffxassert(all(global_voxel_coord >= FFX_BROADCAST_UINT32X3(0)) && all(global_voxel_coord < FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
FfxUInt32 brick_id = LoadCascadeBrickMap(WrapFlatCoords(FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE)));
if (brick_id != FFX_BRIXELIZER_UNINITIALIZED_ID) {
FfxBrixelizerMarkBrickFree(brick_id);
StoreCascadeBrickMap(WrapFlatCoords(FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE)), FFX_BRIXELIZER_UNINITIALIZED_ID);
}
}
} else {
}
}
}
void FfxBrixelizerCoarseCulling(FfxUInt32 gtid, FfxUInt32 group_id)
{
FfxUInt32 thread_subvoxel_offset = group_id * FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE + gtid;
FfxUInt32 job_idx = JobLowerBoundBySubvoxel(thread_subvoxel_offset, GetBuildInfoNumJobs());
FfxBoolean needs_rebuild = false;
if (job_idx < GetBuildInfoNumJobs()) {
FfxBrixelizerBrixelizationJob job = LoadBrixelizationJob(job_idx);
FfxUInt32 subvoxel_id = thread_subvoxel_offset - LoadJobIndex(job_idx);
FfxInt32x3 dim = FfxInt32x3(job.aabbMax - job.aabbMin);
ffxassert(all(job.aabbMax > FFX_BROADCAST_UINT32X3(0)));
ffxassert(all(job.aabbMin >= FFX_BROADCAST_UINT32X3(0)));
ffxassert(all(job.aabbMin < FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
ffxassert(all(job.aabbMax > job.aabbMin));
ffxassert(all(job.aabbMax <= FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
if (FFX_HAS_FLAG(job.flags, FFX_BRIXELIZER_JOB_FLAG_INVALIDATE)) {
} else {
if (subvoxel_id < dim.x * dim.y * dim.z) {
FfxUInt32x3 subvoxel_coord = FfxBrixelizerUnflatten(subvoxel_id, dim);
FfxUInt32x3 global_voxel_coord = subvoxel_coord + job.aabbMin;
ffxassert(all(global_voxel_coord >= FFX_BROADCAST_UINT32X3(0)) && all(global_voxel_coord < FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
FfxBoolean this_needs_rebuild = CanBuildThisVoxel(voxel_idx);
#ifdef FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
this_needs_rebuild = true;
#else // !FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
#endif // !FFX_BRIXELIZER_DEBUG_FORCE_REBUILD
needs_rebuild = this_needs_rebuild;
}
}
}
if (needs_rebuild) {
IncrementScratchJobCounter(job_idx, 1);
}
}
void FfxBrixelizerScanJobs(FfxUInt32 job_idx, FfxUInt32 gtid, FfxUInt32 group_id)
{
FfxBoolean is_touched = job_idx < GetBuildInfoNumJobs() && LoadScratchJobCounter(job_idx) > 0;
FfxBrixelizerBrixelizationJob job = LoadBrixelizationJob(job_idx);
FfxBrixelizerInstanceInfo instance_info = LoadInstanceInfo(job.instanceIdx);
ffxassert((job.flags & FFX_BRIXELIZER_JOB_FLAG_INVALIDATE) == 0);
// Scan triangle counts so that later we can map thread_id -> job_idx
{
gs_ffx_brixelizer_scan_buffer[gtid] = is_touched ? instance_info.triangleCount : 0;
FFX_GROUP_MEMORY_BARRIER;
FfxUInt32 sum = GroupScanExclusiveAdd(gtid, FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE);
if (job_idx < GetBuildInfoNumJobs()) StoreScratchJobCountersScan(job_idx, gs_ffx_brixelizer_scan_buffer[gtid]);
if (gtid == 0) // The first thread stores the sum
StoreGlobalJobTriangleCounterScan(group_id, sum);
}
if (gtid == 0) IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_GROUP_INDEX, 1, gs_ffx_brixelizer_scan_group_id);
FfxUInt32 total_group_count = (GetBuildInfoNumJobs() + FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE - 1) / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE;
FFX_GROUP_MEMORY_BARRIER; // Wait for gs_ffx_brixelizer_scan_group_id
if (total_group_count - 1 == gs_ffx_brixelizer_scan_group_id) { // the last group does the rest of the scans
GROUP_SCAN(gtid,
total_group_count,
FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE,
LoadGlobalJobTriangleCounterScan,
StoreGlobalJobTriangleCounterScan);
if (gtid == 0) {
StoreScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_GROUP_INDEX, FfxUInt32(0));
InitializeJobIndirectArgs(gs_ffx_brixelizer_scan_group_id);
}
}
}
void FfxBrixelizerVoxelize(FfxUInt32 gtid, FfxUInt32 group_id)
{
if (gtid == 0) {
gs_ffx_brixelizer_voxelizer_item_counter = 0;
gs_ffx_brixelizer_voxelizer_ref_counter = 0;
gs_ffx_brixelizer_voxelizer_ref_offset = 0;
gs_ffx_brixelizer_triangle_offset_global = 0;
gs_ffx_brixelizer_triangle_offset = 0;
}
FFX_GROUP_MEMORY_BARRIER; // Wait for initialization
FfxUInt32 thread_triangle_offset = group_id * FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE + gtid;
FfxUInt32 job_idx = JobLowerBound(thread_triangle_offset, GetBuildInfoNumJobs());
FfxBoolean is_touched = job_idx < GetBuildInfoNumJobs() && LoadScratchJobCounter(job_idx) > 0;
FfxUInt32 triangle_index = thread_triangle_offset - LoadJobTriangleCountScan(job_idx);
FfxBrixelizerTriangle tri;
FfxBrixelizerCRVoxelTriangleBounds tvbounds;
FfxBoolean collides = false;
if (is_touched) {
FfxBrixelizerBrixelizationJob job = LoadBrixelizationJob(job_idx);
FfxBrixelizerInstanceInfo instance_info = LoadInstanceInfo(job.instanceIdx);
if (triangle_index < instance_info.triangleCount) {
collides = GetTriangleBounds(job.instanceIdx, job_idx, ffxBrixelizerInstanceInfoGetMeshInfo(instance_info), triangle_index, /* out */ tri,
/* out */ tvbounds);
}
}
ffxassert(!collides || all(tvbounds.ubound_max <= FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
ffxassert(!collides || all(tvbounds.ubound_min < FFX_BROADCAST_UINT32X3(FFX_BRIXELIZER_CASCADE_RESOLUTION)));
FfxUInt32 item_offset;
if (collides) {
FFX_ATOMIC_ADD_RETURN(gs_ffx_brixelizer_voxelizer_item_counter, 1, item_offset);
FfxBrixelizerCRItem item;
item.bounds_min = tvbounds.ubound_min;
item.bounds_max = tvbounds.ubound_max;
FfxBrixelizerCRStoreItem(FfxInt32(item_offset), item);
FfxUInt32x3 dim = tvbounds.ubound_max - tvbounds.ubound_min;
#if defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32 num_refs = dim.x * dim.z;
#else // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32 num_refs = dim.x * dim.y * dim.z;
#endif // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
FFX_ATOMIC_ADD(gs_ffx_brixelizer_voxelizer_ref_counter, num_refs);
gs_ffx_brixelizer_voxelizer_items_ref_count[item_offset] = num_refs;
gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_offset] = 0;
}
FFX_GROUP_MEMORY_BARRIER; // Wait for gs_ffx_brixelizer_voxelizer_ref_counter
if (gs_ffx_brixelizer_voxelizer_item_counter == FfxUInt32(0)) return; // scalar
#if defined(FFX_BRIXELIZER_VOXELIZER_CHECK_BRICKS)
{
FfxUInt32 item_id = FfxUInt32(0);
FfxUInt32 ref_scan = FfxUInt32(0);
for (FfxUInt32 ref_id = gtid; ref_id < gs_ffx_brixelizer_voxelizer_ref_counter; ref_id += FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE) {
while (ref_id >= gs_ffx_brixelizer_voxelizer_items_ref_count[item_id] + ref_scan) {
ref_scan += gs_ffx_brixelizer_voxelizer_items_ref_count[item_id];
item_id++;
}
if (ref_id >= gs_ffx_brixelizer_voxelizer_ref_counter) break;
FfxUInt32 local_ref_id = ref_id - ref_scan;
FfxBrixelizerCRItem item = FfxBrixelizerCRLoadItem(item_id);
FfxUInt32x3 dim = item.bounds_max - item.bounds_min;
# if defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32x2 local_voxel_coord = FfxBrixelizerUnflatten(local_ref_id, dim.xz);
for (FfxUInt32 y = FfxUInt32(0); y < dim.y; y++) {
FfxUInt32x3 global_voxel_coord = FfxUInt32x3(local_voxel_coord.x, y, local_voxel_coord.y) + item.bounds_min;
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
if (CanBuildThisVoxel(voxel_idx)) {
FFX_ATOMIC_OR(gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id], FfxUInt32(1));
}
}
# else // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32x3 local_voxel_coord = FfxBrixelizerUnflatten(local_ref_id, dim);
FfxUInt32x3 global_voxel_coord = local_voxel_coord + item.bounds_min;
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
if (CanBuildThisVoxel(voxel_idx)) {
FFX_ATOMIC_OR(gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id], FfxUInt32(1));
}
# endif // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
}
}
#endif // defined(FFX_BRIXELIZER_VOXELIZER_CHECK_BRICKS)
FFX_GROUP_MEMORY_BARRIER;
const FfxUInt32 MAX_STORAGE = LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_TRIANGLES);
FfxFloat32x3 bounds = tvbounds.bound_max - tvbounds.bound_min;
FfxFloat32 aabb_max_dim = ffxMax3(bounds.x, bounds.y, bounds.z);
FfxFloat32 voxel_size_ratio = aabb_max_dim / GetCascadeInfoVoxelSize();
FfxBoolean small_triangle = voxel_size_ratio < FfxFloat32(1.0e-1); // 1/10th of a brick is small enough for the point approximation
FfxUInt32 triangle_size = FfxUInt32(0);
if (small_triangle)
triangle_size = FfxUInt32(12);
else
triangle_size = FfxUInt32(24);
#if defined(FFX_BRIXELIZER_VOXELIZER_CHECK_BRICKS)
FfxUInt32 hit_cnt = gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_offset];
#else // ! defined(FFX_BRIXELIZER_VOXELIZER_CHECK_BRICKS)
FfxUInt32 hit_cnt = 1;
#endif // ! defined(FFX_BRIXELIZER_VOXELIZER_CHECK_BRICKS)
FfxUInt32 local_triangle_swap_offset;
if (collides && hit_cnt != FfxUInt32(0)) FFX_ATOMIC_ADD_RETURN(gs_ffx_brixelizer_triangle_offset, triangle_size, local_triangle_swap_offset);
FFX_GROUP_MEMORY_BARRIER;
if (gtid == FfxUInt32(0)) {
IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_TRIANGLES, /* in */ gs_ffx_brixelizer_triangle_offset,
/* out */ gs_ffx_brixelizer_triangle_offset_global);
// Check that there's enough swap space for the triangles
gs_ffx_brixelizer_voxelizer_has_space = gs_ffx_brixelizer_triangle_offset_global + gs_ffx_brixelizer_triangle_offset <= MAX_STORAGE;
}
FFX_GROUP_MEMORY_BARRIER;
// Swap only triangles that have enough resources to get voxelized
if (collides && gs_ffx_brixelizer_voxelizer_has_space) {
if (hit_cnt != FfxUInt32(0)) {
FfxUInt32 triangle_id_swap_offset = local_triangle_swap_offset + gs_ffx_brixelizer_triangle_offset_global;
if (small_triangle) {
FfxBrixelizerStoreTriangleCenter(triangle_id_swap_offset, tri);
triangle_id_swap_offset |= FFX_BRIXELIZER_TRIANGLE_SMALL_FLAG;
} else
FfxBrixelizerStoreTrianglePartial(triangle_id_swap_offset, tri);
gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_offset] = triangle_id_swap_offset;
} else {
gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_offset] = FfxUInt32(-1);
}
}
FFX_GROUP_MEMORY_BARRIER;
{
FfxUInt32 item_id = FfxUInt32(0);
FfxUInt32 ref_scan = FfxUInt32(0);
for (FfxUInt32 ref_id = gtid; ref_id < gs_ffx_brixelizer_voxelizer_ref_counter; ref_id += FFX_BRIXELIZER_STATIC_CONFIG_VOXELIZER_GROUP_SIZE) {
while (ref_id >= gs_ffx_brixelizer_voxelizer_items_ref_count[item_id] + ref_scan) {
ref_scan += gs_ffx_brixelizer_voxelizer_items_ref_count[item_id];
item_id++;
}
if (ref_id >= gs_ffx_brixelizer_voxelizer_ref_counter) break;
if (gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id] == FfxUInt32(-1)) continue; // Skip if culled
FfxUInt32 local_ref_id = ref_id - ref_scan;
FfxBrixelizerCRItem item = FfxBrixelizerCRLoadItem(FfxInt32(item_id));
FfxUInt32x3 dim = item.bounds_max - item.bounds_min;
FfxUInt32 num_cells = dim.x * dim.y * dim.z;
#if defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32x2 local_voxel_coord = FfxBrixelizerUnflatten(local_ref_id, dim.xz);
// Only cull if the number of cells is more than N
FfxBoolean check_range = !FfxBrixelizerTriangleIsSmall(gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id]) && num_cells > FfxUInt32(1);
FfxBrixelizerTrianglePartial tri;
FfxFloat32x3 e0;
FfxFloat32x3 e1;
FfxFloat32x3 e2;
FfxFloat32x3 gn;
if (check_range) {
FfxBrixelizerLoadTrianglePartial(FfxBrixelizerTriangleIDGetOffset(gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id]), /* out */ tri);
e0 = tri.wp1.xyz - tri.wp0.xyz;
e1 = tri.wp2.xyz - tri.wp1.xyz;
e2 = tri.wp0.xyz - tri.wp2.xyz;
gn = normalize(cross(e2, e0));
}
for (FfxUInt32 y = FfxUInt32(0); y < dim.y; y++) {
FfxUInt32x3 global_voxel_coord = FfxUInt32x3(local_voxel_coord.x, y, local_voxel_coord.y) + item.bounds_min;
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
if (check_range) {
FfxFloat32x3 voxel_offset = GetCascadeInfoVoxelSize() * (FfxFloat32x3(global_voxel_coord) + FFX_BROADCAST_FLOAT32X3(0.5));
FfxFloat32 dist = abs(dot(gn, (voxel_offset - tri.wp0)));
if (dist > GetCascadeInfoVoxelSize() * FfxFloat32(2.0)) continue;
dist = CalculateDistanceToTriangle(voxel_offset, tri.wp0, tri.wp1, tri.wp2);
if (dist > GetCascadeInfoVoxelSize() * FfxFloat32(2.0)) continue;
}
if (!gs_ffx_brixelizer_voxelizer_has_space) {
MarkFailed(voxel_idx);
} else {
AddReferenceOrMarkVoxelFailed(voxel_idx, gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id]);
}
}
#else // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
FfxUInt32x3 local_voxel_coord = FfxBrixelizerUnflatten(local_ref_id, dim);
FfxUInt32x3 global_voxel_coord = local_voxel_coord + item.bounds_min;
FfxUInt32 voxel_idx = FfxBrixelizerFlattenPOT(global_voxel_coord, FFX_BRIXELIZER_CASCADE_DEGREE);
if (!gs_ffx_brixelizer_voxelizer_has_space) {
MarkFailed(voxel_idx);
} else {
AddReferenceOrMarkVoxelFailed(voxel_idx, gs_ffx_brixelizer_voxelizer_items_triangle_id_swap_offsets[item_id]);
}
#endif // !defined(FFX_BRIXELIZER_VOXELIZER_2D)
}
}
}
void FfxBrixelizerScanReferences(FfxUInt32 voxel_flat_id, FfxUInt32 gtid, FfxUInt32 group_id)
{
FfxUInt32 total_cell_count = (FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION);
FfxUInt32 total_group_count = (total_cell_count + FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE - FfxUInt32(1)) / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE;
FfxUInt32 ref_count = voxel_flat_id < total_cell_count ? LoadScratchCR1RefCounter(voxel_flat_id) : FfxUInt32(0);
FfxUInt32 failed_at_voxelizer = LoadScratchVoxelAllocationFailCounter(voxel_flat_id);
if (failed_at_voxelizer != FfxUInt32(0)) {
ref_count = FfxUInt32(0);
FfxBrixelizerClearRefCounter(voxel_flat_id);
}
FfxUInt32 brick_id = LoadCascadeBrickMap(WrapFlatCoords(voxel_flat_id));
// Brick allocation/deallocation logic
if (ref_count > 0) {
if (brick_id == FFX_BRIXELIZER_UNINITIALIZED_ID) { // Allocate a new brick
brick_id = AllocateBrick();
if (FfxBrixelizerIsInvalidID(brick_id)) {
ref_count = FfxUInt32(0);
FfxBrixelizerClearRefCounter(voxel_flat_id);
StoreCascadeBrickMap(WrapFlatCoords(voxel_flat_id), FFX_BRIXELIZER_UNINITIALIZED_ID);
} else {
ffxassert(FfxBrixelizerIsValidID(brick_id));
FfxUInt32 storage_alloc_offset = AllocateStorage(brick_id);
if (storage_alloc_offset == FFX_BRIXELIZER_INVALID_ALLOCATION) {
ref_count = FfxUInt32(0);
FfxBrixelizerClearRefCounter(voxel_flat_id);
FfxBrixelizerMarkBrickFree(brick_id);
StoreCascadeBrickMap(WrapFlatCoords(voxel_flat_id), FFX_BRIXELIZER_UNINITIALIZED_ID);
brick_id = FFX_BRIXELIZER_INVALID_ID;
} else {
AppendClearBrick(brick_id);
AddBrickToCompressionList(brick_id);
StoreCascadeBrickMap(WrapFlatCoords(voxel_flat_id), brick_id);
}
}
} else { // Already have an assigned brick
ref_count = FfxUInt32(0); // No need to rebuild
FfxBrixelizerClearRefCounter(voxel_flat_id);
}
} else {
if (failed_at_voxelizer == FfxUInt32(0) && // Restart next frame
brick_id == FFX_BRIXELIZER_UNINITIALIZED_ID) {
brick_id = FFX_BRIXELIZER_INVALID_ID;
StoreCascadeBrickMap(WrapFlatCoords(voxel_flat_id), FFX_BRIXELIZER_INVALID_ID);
FfxBrixelizerClearRefCounter(voxel_flat_id);
}
}
if (FfxBrixelizerIsValidID(brick_id) && brick_id != FFX_BRIXELIZER_UNINITIALIZED_ID) {
MapBrickToVoxel(brick_id, voxel_flat_id); // Update mapping
}
////////////////////////////////////////////////////
{
// Scan the ref counts for sorting
gs_ffx_brixelizer_scan_buffer[gtid] = ref_count;
FFX_GROUP_MEMORY_BARRIER;
FfxUInt32 ref_sum = GroupScanExclusiveAdd(gtid, FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE);
StoreScratchCR1RefCounterScan(voxel_flat_id, gs_ffx_brixelizer_scan_buffer[gtid]);
if (gtid == FfxUInt32(0)) // The first thread stores the sum
StoreVoxelReferenceGroupSum(group_id, ref_sum);
}
////////////////////////////////////////////////////
{
// Scan the stamp counts for work distribution
FfxUInt32 stamp_count = FfxUInt32(0);
if ((ref_count > 0) && FfxBrixelizerIsValidID(brick_id)) {
stamp_count = ((ref_count + FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP - FfxUInt32(1)) / FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP);
}
gs_ffx_brixelizer_scan_buffer[gtid] = stamp_count;
FFX_GROUP_MEMORY_BARRIER;
FfxUInt32 stamp_sum = GroupScanExclusiveAdd(gtid, FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE);
StoreScratchCR1StampScan(voxel_flat_id, gs_ffx_brixelizer_scan_buffer[gtid]);
if (gtid == FfxUInt32(0)) // The first thread stores the sum
StoreStampGroupSum(group_id, stamp_sum);
}
if (gtid == FfxUInt32(0)) IncrementScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_GROUP_INDEX, FfxUInt32(1), gs_ffx_brixelizer_scan_group_id);
FFX_GROUP_MEMORY_BARRIER; // Wait for gs_ffx_brixelizer_scan_group_id
if (total_group_count - FfxUInt32(1) == gs_ffx_brixelizer_scan_group_id) { // the last group does the rest of the scans
GROUP_SCAN(gtid,
total_group_count,
FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE,
LoadVoxelReferenceGroupSum,
StoreVoxelReferenceGroupSum);
GROUP_SCAN(gtid,
total_group_count,
FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE,
LoadStampGroupSum,
StoreStampGroupSum);
if (gtid == FfxUInt32(0)) {
StoreScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_GROUP_INDEX, FfxUInt32(0));
InitializeIndirectArgs(gs_ffx_brixelizer_scan_group_id);
}
}
}
void FfxBrixelizerCompactReferences(FfxUInt32 tid)
{
FfxUInt32 total_cell_count = (FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION * FFX_BRIXELIZER_CASCADE_RESOLUTION);
FfxUInt32 total_group_count = (total_cell_count + FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE - FfxUInt32(1)) / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE;
FfxUInt32 total_references = min(LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_REFERENCES), LoadScratchCounter(FFX_BRIXELIZER_SCRATCH_COUNTER_MAX_REFERENCES));
if (tid < total_references) {
FfxBrixelizerTriangleReference ref = LoadScratchCR1Reference(tid);
FfxUInt32 voxel_id = ref.voxel_idx;
FfxUInt32 ref_count = LoadScratchCR1RefCounter(voxel_id);
if (ref_count > 0) {
ffxassert(ref.local_ref_idx < ref_count);
FfxUInt32 offset = GetReferenceOffset(voxel_id) + ref.local_ref_idx;
StoreScratchCR1CompactedReferences(offset, ref.triangle_id);
}
}
if (tid < total_cell_count) {
FfxUInt32 group_scan_id = tid / FFX_BRIXELIZER_STATIC_CONFIG_SCAN_REFERENCES_GROUP_SIZE;
FfxUInt32 group_scan_value = LoadStampGroupSum(group_scan_id);
FfxUInt32 local_scan_value = LoadScratchCR1StampScan(tid);
FfxUInt32 stamp_offset = group_scan_value + local_scan_value;
StoreScratchCR1StampScan(tid, stamp_offset);
}
}
void FfxBrixelizerEmitSDF(FfxUInt32 ref_id_offset, FfxUInt32 global_stamp_id)
{
FfxUInt32 voxel_id = StampLowerBound(global_stamp_id);
FfxUInt32 brick_id = LoadCascadeBrickMap(WrapFlatCoords(voxel_id));
FfxUInt32 ref_count = LoadScratchCR1RefCounter(voxel_id);
if (FfxBrixelizerIsInvalidID(brick_id) || ref_count == FfxUInt32(0)) return;
FfxUInt32 refbatch_count = (ref_count + FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP - 1) / FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP;
FfxUInt32 global_stamp_offset = LoadScratchCR1StampScan(voxel_id);
FfxUInt32 voxel_stamp_id = global_stamp_id - global_stamp_offset;
FfxUInt32 refbatch_id = voxel_stamp_id % refbatch_count;
FfxUInt32 voxel_ref_offset = GetReferenceOffset(voxel_id);
FfxUInt32 refbatch_item_offset = refbatch_id * FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP;
FfxUInt32 start_ref_id = voxel_ref_offset + refbatch_item_offset;
FfxUInt32 end_ref_id = voxel_ref_offset + min(refbatch_item_offset + FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_REFS_PER_GROUP, ref_count);
FfxUInt32x3 stamp_min = FfxUInt32x3(0, 0, 0);
FfxUInt32x3 stamp_max = stamp_min + FFX_BROADCAST_UINT32X3(8);
const FfxFloat32 brick_width = FfxFloat32(8.0);
FfxUInt32x3 voxel_coord = FfxBrixelizerUnflattenPOT(voxel_id, FFX_BRIXELIZER_CASCADE_DEGREE);
FfxFloat32 brixel_size = FfxFloat32(GetCascadeInfoVoxelSize() / (brick_width - FfxFloat32(FfxFloat32(1.0))));
FfxFloat32 half_brixel_size = FfxFloat32(brixel_size / FfxFloat32(FfxFloat32(2.0)));
FfxFloat32x3 brick_min = to_float3(voxel_coord) * FfxFloat32(GetCascadeInfoVoxelSize()) - FFX_BROADCAST_FLOAT32X3(FfxFloat32(half_brixel_size));
FfxFloat32x3 brick_max = brick_min + FFX_BROADCAST_FLOAT32X3(brixel_size * brick_width);
FfxFloat32 clamped_dist = ffxAsFloat(FfxUInt32(-1));
for (FfxUInt32 ref_id = start_ref_id + ref_id_offset; ref_id < end_ref_id; ref_id += FFX_BRIXELIZER_STATIC_CONFIG_EMIT_SDF_GROUP_SIZE) {
FfxUInt32 triangle_id = LoadScratchCR1CompactedReferences(ref_id);
if (FfxBrixelizerTriangleIsSmall(triangle_id)) {
FfxFloat32x3 center = FfxBrixelizerLoadTriangleCenter(FfxBrixelizerTriangleIDGetOffset(triangle_id));
FfxFloat32x3 COORD = (center - brick_min) / (brixel_size);
FfxFloat32x3 VOXEL = clamp(floor(COORD), FFX_BROADCAST_FLOAT32X3(0.0), FFX_BROADCAST_FLOAT32X3(7.0));
FfxFloat32x3 p = VOXEL + FFX_BROADCAST_FLOAT32X3(FfxFloat32(0.5));
FfxFloat32 dist = dot2(p - COORD) * FfxFloat32(0.25) * FfxFloat32(0.25);
clamped_dist = FfxFloat32(1.0) * clamp(dist, FfxFloat32(0.0), FfxFloat32(1.0));
BrickInterlockedMin32(brick_id, FfxInt32x3(VOXEL), clamped_dist);
} else {
FfxBrixelizerTrianglePartial tri;
FfxBrixelizerLoadTrianglePartial(FfxBrixelizerTriangleIDGetOffset(triangle_id), /* out */ tri);
FfxFloat32x3 TRIANGLE_VERTEX_0 = (tri.wp0 - brick_min) / (brixel_size);
FfxFloat32x3 TRIANGLE_VERTEX_1 = (tri.wp1 - brick_min) / (brixel_size);
FfxFloat32x3 TRIANGLE_VERTEX_2 = (tri.wp2 - brick_min) / (brixel_size);
const FfxFloat32 TRIANGLE_OFFSET = FfxFloat32(FfxFloat32(0.0));
FfxFloat32x3 TRIANGLE_MIN = min(TRIANGLE_VERTEX_0, min(TRIANGLE_VERTEX_1, TRIANGLE_VERTEX_2));
FfxFloat32x3 TRIANGLE_MAX = max(TRIANGLE_VERTEX_0, max(TRIANGLE_VERTEX_1, TRIANGLE_VERTEX_2));
FfxFloat32x3 TRIANGLE_AABB_MIN;
FfxFloat32x3 TRIANGLE_AABB_MAX;
TRIANGLE_AABB_MIN.x = (floor(TRIANGLE_MIN.x < FfxFloat32(0.0) ? TRIANGLE_MIN.x - FfxFloat32(1.0) : TRIANGLE_MIN.x)) - TRIANGLE_OFFSET;
TRIANGLE_AABB_MIN.y = (floor(TRIANGLE_MIN.y < FfxFloat32(0.0) ? TRIANGLE_MIN.y - FfxFloat32(1.0) : TRIANGLE_MIN.y)) - TRIANGLE_OFFSET;
TRIANGLE_AABB_MIN.z = (floor(TRIANGLE_MIN.z < FfxFloat32(0.0) ? TRIANGLE_MIN.z - FfxFloat32(1.0) : TRIANGLE_MIN.z)) - TRIANGLE_OFFSET;
TRIANGLE_AABB_MAX.x = (floor(TRIANGLE_MAX.x < FfxFloat32(0.0) ? TRIANGLE_MAX.x - FfxFloat32(1.0) : TRIANGLE_MAX.x)) + (FfxFloat32(1.0) + TRIANGLE_OFFSET);
TRIANGLE_AABB_MAX.y = (floor(TRIANGLE_MAX.y < FfxFloat32(0.0) ? TRIANGLE_MAX.y - FfxFloat32(1.0) : TRIANGLE_MAX.y)) + (FfxFloat32(1.0) + TRIANGLE_OFFSET);
TRIANGLE_AABB_MAX.z = (floor(TRIANGLE_MAX.z < FfxFloat32(0.0) ? TRIANGLE_MAX.z - FfxFloat32(1.0) : TRIANGLE_MAX.z)) + (FfxFloat32(1.0) + TRIANGLE_OFFSET);
TRIANGLE_AABB_MIN = max(TRIANGLE_AABB_MIN, FfxFloat32x3(stamp_min));
TRIANGLE_AABB_MAX = min(TRIANGLE_AABB_MAX, FfxFloat32x3(stamp_max));
if (all(FFX_EQUAL(TRIANGLE_AABB_MIN, TRIANGLE_AABB_MAX))) continue;
FfxFloat32x3 a = TRIANGLE_VERTEX_0;
FfxFloat32x3 b = TRIANGLE_VERTEX_1;
FfxFloat32x3 c = TRIANGLE_VERTEX_2;
FfxFloat32x3 ba = TRIANGLE_VERTEX_1 - TRIANGLE_VERTEX_0;
FfxFloat32x3 ac = TRIANGLE_VERTEX_0 - TRIANGLE_VERTEX_2;
FfxFloat32x3 cb = TRIANGLE_VERTEX_2 - TRIANGLE_VERTEX_1;
FfxFloat32x3 nor = cross(ba, ac);
FfxFloat32x3 cross_ba_nor = cross(ba, nor);
FfxFloat32x3 cross_cb_nor = cross(cb, nor);
FfxFloat32x3 cross_ac_nor = cross(ac, nor);
FfxFloat32 dot2_ba = dot2(ba);
FfxFloat32 dot2_cb = dot2(cb);
FfxFloat32 dot2_ac = dot2(ac);
FfxFloat32 dot2_nor = dot2(nor);
#define FFX_BRIXELIZER_TRIANGLE_VOXELIZER_THIN_LAYER
#define FFX_BRIXELIZER_TRIANGLE_VOXELIZER_BODY \
{ \
FfxFloat32x3 p = VOXEL + FFX_BROADCAST_FLOAT32X3(FfxFloat32(0.5)); \
FfxFloat32 dist = CalculateDistanceToTriangleSquared(ba, p - a, c - b, p - b, ac, p - c, nor, cross_ba_nor, cross_cb_nor, cross_ac_nor, dot2_ba, dot2_cb, dot2_ac, dot2_nor) * \
FfxFloat32(0.25) * FfxFloat32(0.25); \
clamped_dist = FfxFloat32(1.0) * clamp(dist, FfxFloat32(0.0), FfxFloat32(1.0)); \
BrickInterlockedMin32(brick_id, FfxInt32x3(VOXEL), clamped_dist); \
}
// for FFX_BRIXELIZER_TRIANGLE_VOXELIZER_THIN_LAYER:
// Basically a simple 2D loop over 2D AABB of a triangle selected by VX_CRD_2 and VX_CRD_0
// Then 1D loop for the depth layer of that triangle for VX_CRD_1
// Sensitive to the selection of the major axis
// else:
// Iterates 3D AABB of a triangle
// Macros allow to change the major plane easily
// For thin layer there's a major plane for the outer 2D iteration and then the one axis left for 1D iteration
#if !defined(VX_CRD_0)
# define VX_CRD_0 x
#endif // !defined(VX_CRD_0)
#if !defined(VX_CRD_1)
# define VX_CRD_1 y
#endif // !defined(VX_CRD_1)
#if !defined(VX_CRD_2)
# define VX_CRD_2 z
#endif // !defined(VX_CRD_2)
{ // Everything is in grid space
// 3 2d edge normals with offsets for edge functions for 3 projections xy, yz, xz
brixelizerreal3 de_xy;
brixelizerreal3x2 ne_xy;
brixelizerreal3 de_xz;
brixelizerreal3x2 ne_xz;
brixelizerreal3 de_yz;
brixelizerreal3x2 ne_yz;
brixelizerreal3 gn; // triangle plane normal
// Need to offset the edge functions by the grid alignment
FfxBrixelizerGet2DEdges( //
/* out */ de_xy, //
/* out */ ne_xy, //
/* out */ de_xz, //
/* out */ ne_xz, //
/* out */ de_yz, //
/* out */ ne_yz, //
/* out */ gn, //
brixelizerreal3(TRIANGLE_VERTEX_0), //
brixelizerreal3(TRIANGLE_VERTEX_1), //
brixelizerreal3(TRIANGLE_VERTEX_2), //
TRIANGLE_OFFSET, //
false
);
brixelizerreal3 VOXEL;
// Some duplication but with the other ordering, only one is used though
brixelizerreal3 de_yx = de_xy;
brixelizerreal3x2 ne_yx = ne_xy;
brixelizerreal3 de_zx = de_xz;
brixelizerreal3x2 ne_zx = ne_xz;
brixelizerreal3 de_zy = de_yz;
brixelizerreal3x2 ne_zy = ne_yz;
#define _CONCAT(a, b) a##b
#define CONCAT(a, b) _CONCAT(a, b)
#define VX_CRD_02 CONCAT(VX_CRD_0, VX_CRD_2)
#define VX_CRD_01 CONCAT(VX_CRD_0, VX_CRD_1)
#define VX_CRD_12 CONCAT(VX_CRD_1, VX_CRD_2)
#define VX_DE_01 CONCAT(de_, VX_CRD_01)
#define VX_DE_02 CONCAT(de_, VX_CRD_02)
#define VX_DE_12 CONCAT(de_, VX_CRD_12)
#define VX_NE_02 CONCAT(ne_, VX_CRD_02)
#define VX_NE_01 CONCAT(ne_, VX_CRD_01)
#define VX_NE_12 CONCAT(ne_, VX_CRD_12)
// Just one row of the voxelizer along the 1st
#if defined(FFX_BRIXELIZER_TRIANGLE_VOXELIZER_ONE_ROW)
// gn = normalize(cross(e2, e0));
if (gn.VX_CRD_1 < brixelizerreal(0.0)) {
gn = -gn; // make normal point in +z direction
}
brixelizerreal ny_inv = brixelizerreal(brixelizerreal(1.0) / max(gn.VX_CRD_1, brixelizerreal(1.0e-4)));
brixelizerreal d_tri = -dot(gn, brixelizerreal3(TRIANGLE_VERTEX_0));
// 2 plane equation offsets with grid alignment
brixelizerreal d_tri_proj_min = -FfxBrixelizerOffsetByMax(d_tri, gn.VX_CRD_01, TRIANGLE_OFFSET) * ny_inv;
brixelizerreal d_tri_proj_max = -FfxBrixelizerOffsetByMin(d_tri, gn.VX_CRD_01, TRIANGLE_OFFSET) * ny_inv;
VOXEL.VX_CRD_2 = TRIANGLE_AABB_MIN.VX_CRD_2;
{
VOXEL.VX_CRD_0 = TRIANGLE_AABB_MIN.VX_CRD_0;
{
if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_02, VX_DE_02, VX_NE_02)) // 2D triangle test
{
// Now figure out the 3rd coordinate range [min, max]
// By doing range analysis on the evaluation of the plane equation on the 4 corners of the row
brixelizerreal y00 = -((VOXEL.VX_CRD_0 + brixelizerreal(0.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(0.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y01 = -((VOXEL.VX_CRD_0 + brixelizerreal(0.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(1.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y10 = -((VOXEL.VX_CRD_0 + brixelizerreal(1.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(0.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y11 = -((VOXEL.VX_CRD_0 + brixelizerreal(1.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(1.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal min_y = floor(min(y00, min(y01, min(y10, y11))) - d_tri * ny_inv);
min_y = max(TRIANGLE_AABB_MIN.VX_CRD_1, min_y);
brixelizerreal max_y = floor(max(y00, max(y01, max(y10, y11))) - d_tri * ny_inv) + brixelizerreal(1.0);
max_y = min(TRIANGLE_AABB_MAX.VX_CRD_1, max_y);
// brixelizerreal min_y = floor(min(y00, min(y01, min(y10, y11 + d_tri_proj_min))) + d_tri_proj_min);
// min_y = max(TRIANGLE_AABB_MIN.VX_CRD_1, min_y);
// brixelizerreal max_y = floor(max(y00, max(y01, max(y10, y11))) + d_tri_proj_max) + brixelizerreal(1.0);
// max_y = min(TRIANGLE_AABB_MAX.VX_CRD_1, max_y);
for (VOXEL.VX_CRD_1 = min_y; VOXEL.VX_CRD_1 < max_y; VOXEL.VX_CRD_1 += brixelizerreal(1.0)) {
// for (VOXEL.VX_CRD_1 = TRIANGLE_AABB_MIN.VX_CRD_1; VOXEL.VX_CRD_1 < TRIANGLE_AABB_MAX.VX_CRD_1; VOXEL.VX_CRD_1 += brixelizerreal(1.0)) {
// if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_01, VX_DE_01, VX_NE_01) && // the rest of the 2D triangle tests
// FfxBrixelizerEvalEdge(VOXEL.VX_CRD_12, VX_DE_12, VX_NE_12)) //
{ FFX_BRIXELIZER_TRIANGLE_VOXELIZER_BODY; }
}
}
}
}
// Outer loop iterates on the 2d bounding box
#elif defined(FFX_BRIXELIZER_TRIANGLE_VOXELIZER_THIN_LAYER)
// gn = normalize(cross(e2, e0));
if (gn.VX_CRD_1 < brixelizerreal(0.0)) {
gn = -gn; // make normal point in +z direction
}
brixelizerreal ny_inv = brixelizerreal(brixelizerreal(1.0) / max(gn.VX_CRD_1, brixelizerreal(1.0e-4)));
brixelizerreal d_tri = -dot(gn, brixelizerreal3(TRIANGLE_VERTEX_0));
brixelizerreal d_tri_proj_min = -FfxBrixelizerOffsetByMax(d_tri, gn.VX_CRD_01, TRIANGLE_OFFSET) * ny_inv;
brixelizerreal d_tri_proj_max = -FfxBrixelizerOffsetByMin(d_tri, gn.VX_CRD_01, TRIANGLE_OFFSET) * ny_inv;
// For thin layer we iterate N^2 and project a point on two planes to find the lower/upper bound for the 3rd inner loop
for (VOXEL.VX_CRD_2 = TRIANGLE_AABB_MIN.VX_CRD_2; VOXEL.VX_CRD_2 < TRIANGLE_AABB_MAX.VX_CRD_2; VOXEL.VX_CRD_2 += brixelizerreal(1.0)) {
for (VOXEL.VX_CRD_0 = TRIANGLE_AABB_MIN.VX_CRD_0; VOXEL.VX_CRD_0 < TRIANGLE_AABB_MAX.VX_CRD_0; VOXEL.VX_CRD_0 += brixelizerreal(1.0)) {
if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_02, VX_DE_02, VX_NE_02)) // 2D triangle test
{
// Now figure out the 3rd coordinate range [min, max]
// By doing range analysis on the evaluation of 4 corners of the grid
brixelizerreal y00 = -((VOXEL.VX_CRD_0 + brixelizerreal(0.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(0.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y01 = -((VOXEL.VX_CRD_0 + brixelizerreal(0.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(1.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y10 = -((VOXEL.VX_CRD_0 + brixelizerreal(1.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(0.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal y11 = -((VOXEL.VX_CRD_0 + brixelizerreal(1.0)) * gn.VX_CRD_0 + (VOXEL.VX_CRD_2 + brixelizerreal(1.0)) * gn.VX_CRD_2) * ny_inv;
brixelizerreal min_y = floor(min(y00 + d_tri_proj_min, min(y01 + d_tri_proj_min, min(y10 + d_tri_proj_min, y11 + d_tri_proj_min))));
min_y = max(TRIANGLE_AABB_MIN.VX_CRD_1, min_y);
brixelizerreal max_y = floor(max(y00 + d_tri_proj_max, max(y01 + d_tri_proj_max, max(y10 + d_tri_proj_max, y11 + d_tri_proj_max)))) + brixelizerreal(1.0);
max_y = min(TRIANGLE_AABB_MAX.VX_CRD_1, max_y);
for (VOXEL.VX_CRD_1 = min_y; VOXEL.VX_CRD_1 < max_y; VOXEL.VX_CRD_1 += brixelizerreal(1.0)) {
if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_01, VX_DE_01, VX_NE_01) && // the rest of the 2D triangle tests
FfxBrixelizerEvalEdge(VOXEL.VX_CRD_12, VX_DE_12, VX_NE_12)) //
{
FFX_BRIXELIZER_TRIANGLE_VOXELIZER_BODY;
}
}
}
}
}
#else // !FFX_BRIXELIZER_TRIANGLE_VOXELIZER_THIN_LAYER
// For thick layer we iterate N^3
for (VOXEL.VX_CRD_2 = TRIANGLE_AABB_MIN.VX_CRD_2; VOXEL.VX_CRD_2 < TRIANGLE_AABB_MAX.VX_CRD_2; VOXEL.VX_CRD_2 += brixelizerreal(1.0)) {
for (VOXEL.VX_CRD_0 = TRIANGLE_AABB_MIN.VX_CRD_0; VOXEL.VX_CRD_0 < TRIANGLE_AABB_MAX.VX_CRD_0; VOXEL.VX_CRD_0 += brixelizerreal(1.0)) {
if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_02, VX_DE_02, VX_NE_02)) //
{
for (VOXEL.VX_CRD_1 = TRIANGLE_AABB_MIN.VX_CRD_1; VOXEL.VX_CRD_1 < TRIANGLE_AABB_MAX.VX_CRD_1; VOXEL.VX_CRD_1 += brixelizerreal(1.0)) {
if (FfxBrixelizerEvalEdge(VOXEL.VX_CRD_01, VX_DE_01, VX_NE_01) && //
FfxBrixelizerEvalEdge(VOXEL.VX_CRD_12, VX_DE_12, VX_NE_12)) //
{
FFX_BRIXELIZER_TRIANGLE_VOXELIZER_BODY;
}
}
}
}
}
#endif
}
#undef FFX_BRIXELIZER_TRIANGLE_VOXELIZER_BODY
#undef FFX_BRIXELIZER_TRIANGLE_VOXELIZER_THIN_LAYER
}
}
}
FFX_GROUPSHARED FfxUInt32x3 lds_aabb_tree_min;
FFX_GROUPSHARED FfxUInt32x3 lds_aabb_tree_max;
// Build AABB tree for 64^3 for 4^3 stamp
void FfxBrixelizerBuildTreeAABB(FfxUInt32x3 gid, FfxUInt32x3 group_id)
{
FfxUInt32 layer_idx = GetBuildInfoTreeIteration();
if (all(FFX_EQUAL(gid, FFX_BROADCAST_UINT32X3(0)))) {
lds_aabb_tree_min = FFX_BROADCAST_UINT32X3(FfxUInt32(-1));
lds_aabb_tree_max = FFX_BROADCAST_UINT32X3(FfxUInt32(0));
}
FFX_GROUP_MEMORY_BARRIER;
if (layer_idx == FfxUInt32(0)) { // bottom level 16^16^16 of 4^4^4
FfxUInt32x3 child_coord = gid.xyz;
FfxUInt32x3 node_offset = group_id.xyz * FfxUInt32(4);
FfxUInt32x3 voxel_coord = node_offset + child_coord;
FfxUInt32 brick_id = LoadCascadeBrickMap(FfxBrixelizerFlattenPOT(WrapCoords(voxel_coord), FFX_BRIXELIZER_CASCADE_DEGREE));
FfxBoolean full_or_unitialized = brick_id != FFX_BRIXELIZER_INVALID_ID; // It's a valid brick or an uninitialized one
if (full_or_unitialized) {
FfxUInt32 brick_aabb_pack = 0x3FE00;
if (brick_id != FFX_BRIXELIZER_UNINITIALIZED_ID) {
brick_aabb_pack = LoadBricksAABB(FfxBrixelizerBrickGetIndex(brick_id));
}
FfxUInt32x3 brick_aabb_umin = FfxBrixelizerUnflattenPOT(brick_aabb_pack & ((FfxUInt32(1) << FfxUInt32(9)) - FfxUInt32(1)), FfxUInt32(3));
FfxUInt32x3 brick_aabb_umax = FfxBrixelizerUnflattenPOT((brick_aabb_pack >> FfxUInt32(9)) & ((FfxUInt32(1) << FfxUInt32(9)) - FfxUInt32(1)), FfxUInt32(3));
FFX_ATOMIC_MIN(lds_aabb_tree_min.x, child_coord.x * FfxUInt32(8) + brick_aabb_umin.x);
FFX_ATOMIC_MIN(lds_aabb_tree_min.y, child_coord.y * FfxUInt32(8) + brick_aabb_umin.y);
FFX_ATOMIC_MIN(lds_aabb_tree_min.z, child_coord.z * FfxUInt32(8) + brick_aabb_umin.z);
FFX_ATOMIC_MAX(lds_aabb_tree_max.x, child_coord.x * FfxUInt32(8) + brick_aabb_umax.x);
FFX_ATOMIC_MAX(lds_aabb_tree_max.y, child_coord.y * FfxUInt32(8) + brick_aabb_umax.y);
FFX_ATOMIC_MAX(lds_aabb_tree_max.z, child_coord.z * FfxUInt32(8) + brick_aabb_umax.z);
}
FFX_GROUP_MEMORY_BARRIER;
if (all(FFX_EQUAL(gid, FFX_BROADCAST_UINT32X3(0)))) {
FfxUInt32 flat_stamp_idx = FfxBrixelizerFlattenPOT(group_id.xyz, FfxUInt32(4));
FfxUInt32 min_pack = FfxBrixelizerFlattenPOT(lds_aabb_tree_min.xyz & FfxUInt32(0x1f), FfxUInt32(5));
FfxUInt32 max_pack = FfxBrixelizerFlattenPOT(lds_aabb_tree_max.xyz & FfxUInt32(0x1f), FfxUInt32(5));
StoreCascadeAABBTreeUInt(flat_stamp_idx, min_pack | (max_pack << FfxUInt32(16)));
}
} else if (layer_idx == FfxUInt32(1)) { // mid level 4^4^4 of 4^4^4
FfxUInt32x3 child_coord = gid.xyz * FfxUInt32(4) + group_id.xyz * FfxUInt32(16);
FfxUInt32 child_idx = FfxBrixelizerFlattenPOT(child_coord / FfxUInt32(4), FfxUInt32(4));
FfxUInt32 bottom_aabb_node = LoadCascadeAABBTreeUInt(child_idx);
FfxUInt32x3 aabb_min = FfxBrixelizerUnflattenPOT(bottom_aabb_node & FfxUInt32(0x7fff), FfxUInt32(5));
FfxUInt32x3 aabb_max = FfxBrixelizerUnflattenPOT((bottom_aabb_node >> FfxUInt32(16)) & FfxUInt32(0x7fff), FfxUInt32(5));
if (bottom_aabb_node != FFX_BRIXELIZER_INVALID_BOTTOM_AABB_NODE) {
FFX_ATOMIC_MIN(lds_aabb_tree_min.x, child_coord.x * FfxUInt32(8) + aabb_min.x);
FFX_ATOMIC_MIN(lds_aabb_tree_min.y, child_coord.y * FfxUInt32(8) + aabb_min.y);
FFX_ATOMIC_MIN(lds_aabb_tree_min.z, child_coord.z * FfxUInt32(8) + aabb_min.z);
FFX_ATOMIC_MAX(lds_aabb_tree_max.x, child_coord.x * FfxUInt32(8) + aabb_max.x);
FFX_ATOMIC_MAX(lds_aabb_tree_max.y, child_coord.y * FfxUInt32(8) + aabb_max.y);
FFX_ATOMIC_MAX(lds_aabb_tree_max.z, child_coord.z * FfxUInt32(8) + aabb_max.z);
}
FFX_GROUP_MEMORY_BARRIER;
if (all(FFX_EQUAL(gid, FFX_BROADCAST_UINT32X3(0)))) {
if (lds_aabb_tree_min.x == FfxUInt32(-1)) { // TODO(Dihara): Check this!!!!!
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 6 * FfxBrixelizerFlattenPOT(group_id, 2) + 0, FfxFloat32x3(0.0, 0.0, 0.0));
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 6 * FfxBrixelizerFlattenPOT(group_id, 2) + 3, FfxFloat32x3(0.0, 0.0, 0.0));
} else {
FfxFloat32x3 world_aabb_min = FfxFloat32x3(lds_aabb_tree_min.xyz) * GetCascadeInfoVoxelSize() / FfxFloat32(8.0) + GetCascadeInfoGridMin();
FfxFloat32x3 world_aabb_max = FfxFloat32x3(lds_aabb_tree_max.xyz + FFX_BROADCAST_UINT32X3(1)) * GetCascadeInfoVoxelSize() / FfxFloat32(8.0) + GetCascadeInfoGridMin();
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 3 * (2 * FfxBrixelizerFlattenPOT(group_id, 2) + FfxUInt32(0)), FfxFloat32x3(world_aabb_min));
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 3 * (2 * FfxBrixelizerFlattenPOT(group_id, 2) + FfxUInt32(1)), FfxFloat32x3(world_aabb_max));
}
}
} else if (layer_idx == FfxUInt32(2)) { // toP level 4^4^4
FfxUInt32x3 child_coord = gid.xyz;
FfxUInt32 child_idx = FfxBrixelizerFlattenPOT(child_coord, FfxUInt32(2));
FfxFloat32x3 stamp_aabb_min = LoadCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + (FfxUInt32(2) * child_idx + FfxUInt32(0)) * 3);
FfxFloat32x3 stamp_aabb_max = LoadCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + (FfxUInt32(2) * child_idx + FfxUInt32(1)) * 3);
FfxUInt32x3 voxel_aabb_min = FfxUInt32x3(max(FFX_BROADCAST_FLOAT32X3(0.0), stamp_aabb_min - GetCascadeInfoGridMin()) / (GetCascadeInfoVoxelSize() / FfxFloat32(8.0)));
FfxUInt32x3 voxel_aabb_max = FfxUInt32x3(max(FFX_BROADCAST_FLOAT32X3(0.0), stamp_aabb_max - GetCascadeInfoGridMin()) / (GetCascadeInfoVoxelSize() / FfxFloat32(8.0)));
if (ffxAsUInt32(stamp_aabb_min.x) != ffxAsUInt32(stamp_aabb_max.x)) {
FFX_ATOMIC_MIN(lds_aabb_tree_min.x, voxel_aabb_min.x);
FFX_ATOMIC_MIN(lds_aabb_tree_min.y, voxel_aabb_min.y);
FFX_ATOMIC_MIN(lds_aabb_tree_min.z, voxel_aabb_min.z);
FFX_ATOMIC_MAX(lds_aabb_tree_max.x, voxel_aabb_max.x);
FFX_ATOMIC_MAX(lds_aabb_tree_max.y, voxel_aabb_max.y);
FFX_ATOMIC_MAX(lds_aabb_tree_max.z, voxel_aabb_max.z);
}
FFX_GROUP_MEMORY_BARRIER;
if (all(FFX_EQUAL(gid, FFX_BROADCAST_UINT32X3(0)))) {
if (lds_aabb_tree_min.x == FfxUInt32(-1)) { // TODO(Dihara): Check this!!!!!
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + FfxUInt32(4 * 4 * 4) * 6 + 0, FfxFloat32x3(0.0, 0.0, 0.0));
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + FfxUInt32(4 * 4 * 4) * 6 + 3, FfxFloat32x3(0.0, 0.0, 0.0));
} else {
FfxFloat32x3 world_aabb_min = FfxFloat32x3(lds_aabb_tree_min.xyz) * GetCascadeInfoVoxelSize() / FfxFloat32(8.0) + GetCascadeInfoGridMin();
FfxFloat32x3 world_aabb_max = FfxFloat32x3(lds_aabb_tree_max.xyz) * GetCascadeInfoVoxelSize() / FfxFloat32(8.0) + GetCascadeInfoGridMin();
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 3 * FfxUInt32(2 * 4 * 4 * 4 + 0), world_aabb_min);
StoreCascadeAABBTreeFloat3(FfxUInt32(16 * 16 * 16) + 3 * FfxUInt32(2 * 4 * 4 * 4 + 1), world_aabb_max);
}
}
}
}
void FfxBrixelizerClearBrickStorage(FfxUInt32 gtid, FfxUInt32 group_id)
{
FfxUInt32 brick_offset = group_id >> FfxUInt32(3);
FfxUInt32 stamp_id = group_id & FfxUInt32(7);
FfxUInt32 brick_id = LoadScratchBricksClearList(brick_offset);
FfxUInt32 brick_dim = FfxUInt32(8);
FfxUInt32x3 local_coord = FfxBrixelizerUnflattenPOT(gtid, FfxUInt32(2)) + FfxBrixelizerUnflattenPOT(stamp_id, FfxUInt32(1)) * FfxUInt32(4);
ClearBrixelData32(brick_id, FfxInt32x3(local_coord));
}
FFX_GROUPSHARED FfxUInt32x3 lds_brick_aabb_min;
FFX_GROUPSHARED FfxUInt32x3 lds_brick_aabb_max;
void FfxBrixelizerCompressBrick(FfxUInt32 gtid, FfxUInt32 brick_map_offset)
{
FfxUInt32 brick_id = LoadScratchBricksCompressionList(brick_map_offset);
FfxUInt32 voxel_id = FfxBrixelizerLoadBrickVoxelID(brick_id);
FfxUInt32 voxel_idx = FfxBrixelizerVoxelGetIndex(voxel_id);
FfxUInt32 cascade_id = FfxBrixelizerGetVoxelCascade(voxel_id);
if (gtid == 0) {
lds_brick_aabb_max = FFX_BROADCAST_UINT32X3(0);
lds_brick_aabb_min = FFX_BROADCAST_UINT32X3(0xffffffffu);
}
FfxUInt32x3 local_coord = FfxBrixelizerUnflattenPOT(gtid, FfxUInt32(3));
FfxFloat32 val = LoadBrixelData32(brick_id, FfxInt32x3(local_coord));
FFX_GROUP_MEMORY_BARRIER;
if (val < FfxFloat32(1.0 / 8.0)) {
FFX_ATOMIC_MAX(lds_brick_aabb_max.x, local_coord.x);
FFX_ATOMIC_MAX(lds_brick_aabb_max.y, local_coord.y);
FFX_ATOMIC_MAX(lds_brick_aabb_max.z, local_coord.z);
FFX_ATOMIC_MIN(lds_brick_aabb_min.x, local_coord.x);
FFX_ATOMIC_MIN(lds_brick_aabb_min.y, local_coord.y);
FFX_ATOMIC_MIN(lds_brick_aabb_min.z, local_coord.z);
}
FFX_GROUP_MEMORY_BARRIER;
if (gtid == FfxUInt32(0)) {
if (lds_brick_aabb_min.x == FfxUInt32(0xffffffff)) { // free brick
FfxBrixelizerMarkBrickFree(brick_id);
StoreCascadeBrickMap(WrapFlatCoords(voxel_idx), FFX_BRIXELIZER_INVALID_ID);
} else {
FfxUInt32 pack0 = FfxBrixelizerFlattenPOT(min(FFX_BROADCAST_UINT32X3(7), lds_brick_aabb_min), FfxUInt32(3));
FfxUInt32 pack1 = FfxBrixelizerFlattenPOT(min(FFX_BROADCAST_UINT32X3(7), lds_brick_aabb_max), FfxUInt32(3));
StoreBricksAABB(FfxBrixelizerBrickGetIndex(brick_id), pack0 | (pack1 << FfxUInt32(9)));
}
}
if (lds_brick_aabb_min.x != 0xffffffffu) {
if (abs(val) > FfxFloat32(0.9999)) return;
val = (FfxBrixelizerGetSign(val) * sqrt(abs(val)) * FfxFloat32(4.0)) / FfxFloat32(8 - 1);
ffxassert(val >= -FfxFloat32(1.0) && val <= FfxFloat32(1.0));
StoreSDFAtlas(FfxBrixelizerGetSDFAtlasOffset(brick_id) + local_coord, clamp(val, FfxFloat32(0.0), FfxFloat32(1.0)));
}
}
#endif // ifndef FFX_BRIXELIZER_CASCADE_OPS_H