diff --git a/Makefile b/Makefile index a9732534..f69ac8c8 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ ifneq (,$(findstring win64,$(ARCH))) REQ_DEPS += $(RENDERER) json:nlohmann png zlib openal ogg freetype curl luajit reactphysics meshoptimizer xatlas simd ctti gltf # ncurses openvr draco discord bullet ultralight-ux FLAGS += DEPS += -lgdi32 - LINKS += -Wl,-subsystem,windows + LINKS += #-Wl,-subsystem,windows else ifneq (,$(findstring dreamcast,$(ARCH))) REQ_DEPS += simd opengl gldc json:nlohmann reactphysics png zlib ctti # lua ogg openal aldc gltf freetype bullet meshoptimizer draco luajit ultralight-ux ncurses curl openvr discord endif @@ -304,7 +304,7 @@ $(TARGET): $(OBJS) endif %.spv: %.glsl - $(GLSLC) -std=450 -o $@ $< + $(GLSLC) --target-env=vulkan1.2 -o $@ $< $(SPV_OPTIMIZER) --preserve-bindings --preserve-spec-constants -O $@ -o $@ clean: diff --git a/bin/data/config.json b/bin/data/config.json index 5b43007a..cd943be8 100644 --- a/bin/data/config.json +++ b/bin/data/config.json @@ -1,17 +1,17 @@ { "engine": { "scenes": { - "start": "McDonalds", + "start": "SS2", "meshes": { "interleaved": false }, "matrix": { "reverseInfinite": true }, "lights": { "enabled": true, "useLightmaps": true, - "max": 16 + "max": 32 }, "shadows": { "enabled": true, "update": 2, - "max": 8, + "max": 16, "samples": 1 }, "textures": { @@ -48,7 +48,7 @@ "ext": { "vulkan": { "validation": { - "enabled": false, + "enabled": true, "filters": [ "MessageID = 0x4dae5635", // UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout (false positive for cubemaps) "MessageID = 0x609a13b", // UNASSIGNED-CoreValidation-Shader-OutputNotConsumed (from depth-only calls) @@ -74,7 +74,7 @@ // "size": [ 960, 540 ] // "size": [ 640, 480 ] }, - "gpu": "auto", + "gpu": 1, "experimental": { "batch queue submissions": false, "dedicated thread": false @@ -120,7 +120,14 @@ ], "device": [ "VK_KHR_swapchain", - "VK_EXT_shader_viewport_index_layer" + "VK_EXT_shader_viewport_index_layer", + "VK_KHR_acceleration_structure", + "VK_KHR_ray_tracing_pipeline", + "VK_KHR_buffer_device_address", + "VK_KHR_deferred_host_operations", + "VK_EXT_descriptor_indexing", + "VK_KHR_spirv_1_4", + "VK_KHR_shader_float_controls" ] } }, @@ -198,7 +205,7 @@ "streams by default": true }, "memory pool": { - "enabled": false, + "enabled": true, "subPools": true, "alignment": 64, "override": false, @@ -209,14 +216,14 @@ "component": "128 MiB" } }, - "render modes": { "gui": true, "deferred": true }, + "render modes": { "gui": true, "deferred": true, "raytrace": true }, "limiters": { "deltaTime": 5, "framerate": "auto" }, "threads": { "workers" : "auto", - "frame limiter": "auto" + "frame limiter": 0 // "auto" }, "debug": { "framerate": { diff --git a/bin/data/shaders/raytrace/shader.gen.glsl b/bin/data/shaders/raytrace/shader.gen.glsl new file mode 100644 index 00000000..064ed4ea --- /dev/null +++ b/bin/data/shaders/raytrace/shader.gen.glsl @@ -0,0 +1,47 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable +#pragma shader_stage(raygen) +layout (constant_id = 0) const uint PASSES = 2; + +#include "../common/macros.h" +#include "../common/structs.h" + +layout (binding = 0) uniform UBO { + EyeMatrices eyes[2]; +} ubo; +layout (binding = 1) uniform accelerationStructureEXT inTlas; +layout (binding = 2, rgba8) uniform volatile coherent image2D outImage; + +layout (location = 0) rayPayloadEXT vec4 hitValue; + +layout( push_constant ) uniform PushBlock { + uint pass; + uint draw; +} PushConstant; + +void main() { + { + surface.pass = PushConstant.pass; + + const vec2 inUv = (vec2(gl_LaunchIDEXT.xy) + vec2(0.5)) / vec2(gl_LaunchSizeEXT.xy); + + const mat4 iProjectionView = inverse( ubo.eyes[surface.pass].projection * mat4(mat3(ubo.eyes[surface.pass].view)) ); + const vec4 near4 = iProjectionView * (vec4(2.0 * inUv - 1.0, -1.0, 1.0)); + const vec4 far4 = iProjectionView * (vec4(2.0 * inUv - 1.0, 1.0, 1.0)); + const vec3 near3 = near4.xyz / near4.w; + const vec3 far3 = far4.xyz / far4.w; + + surface.ray.direction = normalize( far3 - near3 ); + surface.ray.origin = ubo.eyes[surface.pass].eyePos.xyz; + } + + uint rayFlags = gl_RayFlagsOpaqueEXT; + uint cullMask = 0xFF; + float tMin = 0.001; + float tMax = 10000.0; + + hitValue = vec4(1, 0, 1, 1); + traceRayEXT(inTlas, rayFlags, cullMask, 0, 0, 0, surface.ray.origin.xyz, tMin, surface.ray.direction.xyz, tMax, 0); + + imageStore(outImage, ivec2(gl_LaunchIDEXT.xy), vec4(hitValue)); +} \ No newline at end of file diff --git a/bin/data/shaders/raytrace/shader.hit.glsl b/bin/data/shaders/raytrace/shader.hit.glsl new file mode 100644 index 00000000..6e4879cb --- /dev/null +++ b/bin/data/shaders/raytrace/shader.hit.glsl @@ -0,0 +1,12 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_nonuniform_qualifier : enable +#pragma shader_stage(closest) + +layout(location = 0) rayPayloadInEXT vec4 hitValue; +hitAttributeEXT vec2 attribs; + +void main() { + const vec3 barycentricCoords = vec3(1.0f - attribs.x - attribs.y, attribs.x, attribs.y); + hitValue = vec4(barycentricCoords, 1.0); +} \ No newline at end of file diff --git a/bin/data/shaders/raytrace/shader.miss.glsl b/bin/data/shaders/raytrace/shader.miss.glsl new file mode 100644 index 00000000..8fb8e091 --- /dev/null +++ b/bin/data/shaders/raytrace/shader.miss.glsl @@ -0,0 +1,9 @@ +#version 460 +#extension GL_EXT_ray_tracing : enable +#pragma shader_stage(miss) + +layout(location = 0) rayPayloadInEXT vec4 hitValue; + +void main() { + hitValue = vec4(0.2, 0.2, 0.2, 1.0); +} \ No newline at end of file diff --git a/bin/dreamcast/config.json b/bin/dreamcast/config.json index 4e1cad53..dcaa7e7e 100644 --- a/bin/dreamcast/config.json +++ b/bin/dreamcast/config.json @@ -1,7 +1,7 @@ { "engine": { "scenes": { - "start": "SS2", + "start": "StartMenu", "meshes": { "interleaved": false }, "matrix": { "reverseInfinite": false }, "lights": { "enabled": false, diff --git a/client/main.cpp b/client/main.cpp index 863c5a6a..0792816e 100644 --- a/client/main.cpp +++ b/client/main.cpp @@ -28,6 +28,7 @@ namespace { } void abrt( int sig ) { + ext::ready = false; UF_MSG_ERROR("Abort detected"); #if UF_ENV_DREAMCAST arch_stk_trace(1); @@ -35,6 +36,7 @@ namespace { } void segv( int sig ) { + ext::ready = false; UF_MSG_ERROR("Segfault detected"); #if UF_ENV_DREAMCAST arch_stk_trace(1); @@ -74,7 +76,7 @@ int main(int argc, char** argv){ try { #endif if ( uf::renderer::settings::experimental::dedicatedThread /*&& !uf::renderer::states::rebuild*/ ) { - auto& thread = uf::thread::get("Aux"); + auto& thread = uf::thread::fetchWorker(); uf::thread::queue(thread, [&]{ ext::render(); client::render(); diff --git a/debug.sh b/debug.sh index 211e3142..0ffbc23c 100644 --- a/debug.sh +++ b/debug.sh @@ -3,8 +3,5 @@ cd bin ARCH=$(cat ./exe/default/arch) CC=$(cat ./exe/default/cc) RENDERER=$(cat ./exe/default/renderer) -cp ./exe/lib/$ARCH/*.dll . -cp ./exe/lib/$ARCH/$CC/$RENDERER/*.dll . -gdb ./exe/program.$ARCH.$CC.$RENDERER.exe -rm *.dll - +export PATH="$(pwd)/exe/lib/${ARCH}/:$(pwd)/exe/lib/${ARCH}/${CC}/${RENDERER}/:${PATH}" +gdb ./exe/program.${ARCH}.${CC}.${RENDERER}.exe diff --git a/engine/inc/uf/ext/vulkan/buffer.h b/engine/inc/uf/ext/vulkan/buffer.h index d626e452..7099831e 100644 --- a/engine/inc/uf/ext/vulkan/buffer.h +++ b/engine/inc/uf/ext/vulkan/buffer.h @@ -8,6 +8,7 @@ namespace ext { struct Device; struct UF_API Buffer { + bool aliased = false; ext::vulkan::Device* device = NULL; VkBuffer buffer = VK_NULL_HANDLE; VkDeviceMemory memory = VK_NULL_HANDLE; @@ -17,6 +18,7 @@ namespace ext { 0 }; VkDeviceSize alignment = 0; + size_t address = {}; void* mapped = nullptr; VkBufferUsageFlags usage = 0; @@ -38,9 +40,12 @@ namespace ext { VkResult invalidate( VkDeviceSize size = VK_WHOLE_SIZE, VkDeviceSize offset = 0 ); void allocate( VkBufferCreateInfo ); + uint64_t getAddress(); + uint64_t getAddress() const; + // RAII ~Buffer(); - void initialize( ext::vulkan::Device& device ); + void initialize( ext::vulkan::Device& device, size_t = {} ); void initialize( const void*, VkDeviceSize, VkBufferUsageFlags, VkMemoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool = VK_DEFAULT_STAGE_BUFFERS ); bool update( const void*, VkDeviceSize, bool = VK_DEFAULT_STAGE_BUFFERS ) const; void destroy(); @@ -48,6 +53,7 @@ namespace ext { void aliasBuffer( const Buffer& ); }; struct UF_API Buffers { + size_t requestedAlignment{}; uf::stl::vector buffers; Device* device = NULL; @@ -60,19 +66,12 @@ namespace ext { size_t initializeBuffer( const void*, VkDeviceSize, VkBufferUsageFlags, VkMemoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool = VK_DEFAULT_STAGE_BUFFERS ); bool updateBuffer( const void*, VkDeviceSize, const Buffer&, bool = VK_DEFAULT_STAGE_BUFFERS ) const; - inline size_t initializeBuffer( void* data, VkDeviceSize length, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool stage = VK_DEFAULT_STAGE_BUFFERS ) { return initializeBuffer( (const void*) data, length, usage, memoryProperties, stage ); } - template inline size_t initializeBuffer( const T& data, VkDeviceSize length, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool stage = VK_DEFAULT_STAGE_BUFFERS ) { return initializeBuffer( (const void*) &data, length, usage, memoryProperties, stage ); } - template inline size_t initializeBuffer( const T& data, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool stage = VK_DEFAULT_STAGE_BUFFERS ) { return initializeBuffer( (const void*) &data, static_cast(sizeof(T)), usage, memoryProperties, stage ); } + // template inline size_t initializeBuffer( const T* data, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool stage = VK_DEFAULT_STAGE_BUFFERS ) { return initializeBuffer( (const void*) data, static_cast(sizeof(T)), usage, memoryProperties, stage ); } + // template inline size_t initializeBuffer( const T& data, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, bool stage = VK_DEFAULT_STAGE_BUFFERS ) { return initializeBuffer( (const void*) &data, static_cast(sizeof(T)), usage, memoryProperties, stage ); } inline bool updateBuffer( const void* data, VkDeviceSize length, size_t index = 0, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( data, length, buffers.at(index), stage ); } - inline bool updateBuffer( void* data, VkDeviceSize length, size_t index = 0, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) data, length, index, stage ); } - inline bool updateBuffer( void* data, VkDeviceSize length, const Buffer& buffer, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) data, length, buffer, stage ); } - - template inline bool updateBuffer( const T& data, size_t index = 0, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) &data, static_cast(sizeof(T)), index, stage ); } - template inline bool updateBuffer( const T& data, VkDeviceSize length, size_t index = 0, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) &data, length, index, stage ); } - + // template inline bool updateBuffer( const T* data, const Buffer& buffer, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) data, static_cast(sizeof(T)), buffer, stage ); } template inline bool updateBuffer( const T& data, const Buffer& buffer, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) &data, static_cast(sizeof(T)), buffer, stage ); } - template inline bool updateBuffer( const T& data, VkDeviceSize length, const Buffer& buffer, bool stage = VK_DEFAULT_STAGE_BUFFERS ) const { return updateBuffer( (const void*) &data, length, buffer, stage ); } }; } } \ No newline at end of file diff --git a/engine/inc/uf/ext/vulkan/device.h b/engine/inc/uf/ext/vulkan/device.h index 99ec4447..c402fe5d 100644 --- a/engine/inc/uf/ext/vulkan/device.h +++ b/engine/inc/uf/ext/vulkan/device.h @@ -15,9 +15,6 @@ namespace ext { VkPhysicalDevice physicalDevice; VkDevice logicalDevice; struct { - // uf::stl::unordered_map graphics; - // uf::stl::unordered_map compute; - // uf::stl::unordered_map transfer; uf::ThreadUnique graphics; uf::ThreadUnique compute; uf::ThreadUnique transfer; @@ -47,10 +44,6 @@ namespace ext { uf::stl::vector queueFamilyProperties; struct { - // uf::stl::unordered_map graphics; - // uf::stl::unordered_map present; - // uf::stl::unordered_map compute; - // uf::stl::unordered_map transfer; uf::ThreadUnique graphics; uf::ThreadUnique present; uf::ThreadUnique compute; @@ -59,14 +52,12 @@ namespace ext { uf::Window* window; - /* - struct { - VkFormat depth; - VkFormat color; - VkColorSpaceKHR space; - } formats; - */ - + enum QueueEnum { + GRAPHICS, + PRESENT, + COMPUTE, + TRANSFER, + }; struct QueueFamilyIndices { uint32_t graphics; uint32_t present; @@ -78,10 +69,12 @@ namespace ext { // helpers uint32_t getQueueFamilyIndex( VkQueueFlagBits queueFlags ); uint32_t getMemoryType( uint32_t typeBits, VkMemoryPropertyFlags properties, VkBool32 *memTypeFound = nullptr ); - int rate( VkPhysicalDevice device ); - VkCommandBuffer createCommandBuffer( VkCommandBufferLevel level, bool begin = false ); + VkCommandBuffer createCommandBuffer( VkCommandBufferLevel level, bool begin = true ); + VkCommandBuffer createCommandBuffer( VkCommandBufferLevel level, QueueEnum queue, bool begin = true ); + void flushCommandBuffer( VkCommandBuffer commandBuffer, bool free = true ); + void flushCommandBuffer( VkCommandBuffer commandBuffer, QueueEnum queue, bool free = true ); VkResult createBuffer( VkBufferUsageFlags usage, @@ -99,12 +92,6 @@ namespace ext { const void* data = nullptr ); - enum QueueEnum { - GRAPHICS, - PRESENT, - COMPUTE, - TRANSFER, - }; VkQueue& getQueue( QueueEnum ); VkCommandPool& getCommandPool( QueueEnum ); VkQueue& getQueue( QueueEnum, std::thread::id ); diff --git a/engine/inc/uf/ext/vulkan/enums.h b/engine/inc/uf/ext/vulkan/enums.h index ccdd143b..e3c17a6f 100644 --- a/engine/inc/uf/ext/vulkan/enums.h +++ b/engine/inc/uf/ext/vulkan/enums.h @@ -67,6 +67,9 @@ namespace ext { static const type_t GEOMETRY = VK_SHADER_STAGE_GEOMETRY_BIT; static const type_t FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT; static const type_t COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT; + static const type_t RAY_GEN = VK_SHADER_STAGE_RAYGEN_BIT_KHR; + static const type_t RAY_MISS = VK_SHADER_STAGE_MISS_BIT_KHR; + static const type_t RAY_HIT = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; static const type_t ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS; static const type_t ALL = VK_SHADER_STAGE_ALL; } @@ -129,6 +132,9 @@ namespace ext { static const type_t INDEX = VK_BUFFER_USAGE_INDEX_BUFFER_BIT; static const type_t VERTEX = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; static const type_t INDIRECT = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + static const type_t ACCELERATION_STRUCTURE = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR; + static const type_t ADDRESS = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + static const type_t BINDING_TABLE = VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR; static const type_t STREAM = {}; static const type_t STATIC = {}; diff --git a/engine/inc/uf/ext/vulkan/graphic.h b/engine/inc/uf/ext/vulkan/graphic.h index 7050da71..e3a16114 100644 --- a/engine/inc/uf/ext/vulkan/graphic.h +++ b/engine/inc/uf/ext/vulkan/graphic.h @@ -23,6 +23,8 @@ namespace ext { VkDescriptorSet descriptorSet = VK_NULL_HANDLE; GraphicDescriptor descriptor = {}; + uf::stl::vector sbtEntries; + struct { uf::Serializer json; @@ -76,12 +78,20 @@ namespace ext { bool initialized = false; bool process = true; Material material = {}; - uf::stl::unordered_map pipelines; + uf::stl::unordered_map pipelines; struct { uf::stl::unordered_map buffers; } metadata; + struct { + struct { + VkAccelerationStructureKHR handle; + size_t deviceAddress; + Buffer buffer; + } top, bottom; + } accelerationStructures; + ~Graphic(); void initialize( const uf::stl::string& = "" ); void destroy(); diff --git a/engine/inc/uf/ext/vulkan/rendermode.h b/engine/inc/uf/ext/vulkan/rendermode.h index 6b246a24..e8913aef 100644 --- a/engine/inc/uf/ext/vulkan/rendermode.h +++ b/engine/inc/uf/ext/vulkan/rendermode.h @@ -65,6 +65,7 @@ namespace ext { bool tryMutex( std::thread::id = std::this_thread::get_id() ); void unlockMutex( std::thread::id = std::this_thread::get_id() ); std::lock_guard guardMutex( std::thread::id = std::this_thread::get_id() ); + void cleanupCommands( std::thread::id = std::this_thread::get_id() ); virtual ~RenderMode(); // RAII diff --git a/engine/inc/uf/ext/vulkan/rendermodes/raytrace.h b/engine/inc/uf/ext/vulkan/rendermodes/raytrace.h new file mode 100644 index 00000000..e2e8c495 --- /dev/null +++ b/engine/inc/uf/ext/vulkan/rendermodes/raytrace.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +namespace ext { + namespace vulkan { + struct UF_API RayTraceRenderMode : public ext::vulkan::RenderMode { + ext::vulkan::Graphic blitter; + + // + const uf::stl::string getTarget() const; + void setTarget( const uf::stl::string& ); + + // + virtual const uf::stl::string getType() const; + virtual const size_t blitters() const; + virtual ext::vulkan::Graphic* getBlitter(size_t = 0); + virtual uf::stl::vector getBlitters(); + + virtual GraphicDescriptor bindGraphicDescriptor( const GraphicDescriptor&, size_t = 0 ); + + virtual void createCommandBuffers( const uf::stl::vector& graphics ); + virtual void initialize( Device& device ); + virtual void tick(); + virtual void destroy(); + virtual void render(); + virtual void pipelineBarrier( VkCommandBuffer, uint8_t = -1 ); + }; + } +} \ No newline at end of file diff --git a/engine/inc/uf/ext/vulkan/shader.h b/engine/inc/uf/ext/vulkan/shader.h index 6d9f977b..5c5dca4a 100644 --- a/engine/inc/uf/ext/vulkan/shader.h +++ b/engine/inc/uf/ext/vulkan/shader.h @@ -68,6 +68,13 @@ namespace ext { uint32_t size = 0; // int32_t buffer = -1; }; + struct AccelerationStructure { + uf::stl::string name = ""; + uint32_t index = 0; + uint32_t binding = 0; + uint32_t size = 0; + // int32_t buffer = -1; + }; struct PushConstant { uf::stl::string name = ""; uint32_t index = 0; @@ -90,6 +97,7 @@ namespace ext { uf::stl::unordered_map outputs; uf::stl::unordered_map uniforms; uf::stl::unordered_map storage; + uf::stl::unordered_map accelerationStructure; uf::stl::unordered_map pushConstants; uf::stl::unordered_map specializationConstants; } definitions; diff --git a/engine/inc/uf/ext/vulkan/vk.h b/engine/inc/uf/ext/vulkan/vk.h index e7d898cf..12a19dc7 100644 --- a/engine/inc/uf/ext/vulkan/vk.h +++ b/engine/inc/uf/ext/vulkan/vk.h @@ -14,5 +14,5 @@ #define VK_CHECK_RESULT(f) { VkResult res = (f); if ( res != VK_SUCCESS ) UF_EXCEPTION(ext::vulkan::errorString( res )); } #define VK_FLAGS_NONE 0 -#define VK_DEFAULT_FENCE_TIMEOUT 100000000000 +#define VK_DEFAULT_FENCE_TIMEOUT 5000000000 #define VK_DEFAULT_STAGE_BUFFERS 1 \ No newline at end of file diff --git a/engine/inc/uf/ext/vulkan/vulkan.h b/engine/inc/uf/ext/vulkan/vulkan.h index 4d56bc00..8a248552 100644 --- a/engine/inc/uf/ext/vulkan/vulkan.h +++ b/engine/inc/uf/ext/vulkan/vulkan.h @@ -10,6 +10,19 @@ namespace ext { namespace vulkan { + #if 1 + extern UF_API PFN_vkGetBufferDeviceAddressKHR vkGetBufferDeviceAddressKHR; + extern UF_API PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructureKHR; + extern UF_API PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructureKHR; + extern UF_API PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizesKHR; + extern UF_API PFN_vkGetAccelerationStructureDeviceAddressKHR vkGetAccelerationStructureDeviceAddressKHR; + extern UF_API PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructuresKHR; + extern UF_API PFN_vkBuildAccelerationStructuresKHR vkBuildAccelerationStructuresKHR; + extern UF_API PFN_vkCmdTraceRaysKHR vkCmdTraceRaysKHR; + extern UF_API PFN_vkGetRayTracingShaderGroupHandlesKHR vkGetRayTracingShaderGroupHandlesKHR; + extern UF_API PFN_vkCreateRayTracingPipelinesKHR vkCreateRayTracingPipelinesKHR; + #endif + VkResult CreateDebugUtilsMessengerEXT( VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, @@ -55,6 +68,7 @@ namespace ext { extern UF_API bool dedicatedThread; extern UF_API bool rebuildOnTickBegin; extern UF_API bool batchQueueSubmissions; + extern UF_API bool enableMultiGPU; } namespace invariant { diff --git a/engine/inc/uf/macros.h b/engine/inc/uf/macros.h index 59320ab9..75f0a20c 100644 --- a/engine/inc/uf/macros.h +++ b/engine/inc/uf/macros.h @@ -125,6 +125,10 @@ #define LENGTH_OF(X) *(&X + 1) - X #define FOR_ARRAY(X) for ( auto i = 0; i < LENGTH_OF(X); ++i ) +#define ALIGNED_SIZE(V, A) ((V + A - 1) & ~(A - 1)) + +#define UF_MSG_PEEK(X) #X << ": " << X + #if UF_ENV_DREAMCAST #define DC_STATS() {\ UF_MSG_DEBUG(spec::dreamcast::malloc_stats());\ diff --git a/engine/inc/uf/spec/renderer/vulkan.h b/engine/inc/uf/spec/renderer/vulkan.h index f2863ea0..43992006 100644 --- a/engine/inc/uf/spec/renderer/vulkan.h +++ b/engine/inc/uf/spec/renderer/vulkan.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace spec { diff --git a/engine/inc/uf/utils/math/hash.h b/engine/inc/uf/utils/math/hash.h new file mode 100644 index 00000000..efed04ed --- /dev/null +++ b/engine/inc/uf/utils/math/hash.h @@ -0,0 +1,11 @@ +#pragma once + +namespace uf { + inline void hash(std::size_t& seed) { } + + template + inline void hash(std::size_t& seed, const T& v, Rest... rest) { + seed ^= std::hash()(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); + hash(seed, rest...); + } +} \ No newline at end of file diff --git a/engine/inc/uf/utils/math/math.h b/engine/inc/uf/utils/math/math.h index 48652a58..edf22f24 100644 --- a/engine/inc/uf/utils/math/math.h +++ b/engine/inc/uf/utils/math/math.h @@ -2,6 +2,7 @@ #include #include +#include "hash.h" #if UF_ENV_DREAMCAST #include "sh4.h" diff --git a/engine/inc/uf/utils/mesh/mesh.h b/engine/inc/uf/utils/mesh/mesh.h index f023ab68..222d49bf 100644 --- a/engine/inc/uf/utils/mesh/mesh.h +++ b/engine/inc/uf/utils/mesh/mesh.h @@ -257,12 +257,8 @@ namespace uf { namespace ext { namespace RENDERER { struct UF_API GraphicDescriptor { - #if UF_GRAPHIC_DESCRIPTOR_USE_STRING - typedef uf::stl::string hash_t; - #else typedef size_t hash_t; - #endif - + uf::stl::string renderMode = ""; uf::stl::string pipeline = ""; @@ -273,6 +269,8 @@ namespace ext { uf::Mesh::Input vertex, index, instance, indirect; size_t bufferOffset = 0; pod::Vector3ui dispatch = { 0, 0, 0 }; + size_t width = 0; + size_t height = 0; } inputs; ext::RENDERER::enums::PrimitiveTopology::type_t topology = ext::RENDERER::enums::PrimitiveTopology::TRIANGLE_LIST; @@ -303,6 +301,13 @@ namespace ext { } } +namespace std { + template <> + struct hash { + size_t operator()(const ext::RENDERER::GraphicDescriptor& descriptor) const { return descriptor.hash(); } + }; +} + #undef UF_RENDERER #define UF_VERTEX_DESCRIPTION( TYPE, FORMAT, ATTRIBUTE ) {\ .offset = offsetof(TYPE, ATTRIBUTE),\ diff --git a/engine/inc/uf/utils/thread/perthread.h b/engine/inc/uf/utils/thread/perthread.h index db785141..9e408d94 100644 --- a/engine/inc/uf/utils/thread/perthread.h +++ b/engine/inc/uf/utils/thread/perthread.h @@ -26,6 +26,7 @@ namespace uf { bool tryMutex( id_t id = std::this_thread::get_id() ); void unlockMutex( id_t id = std::this_thread::get_id() ); std::lock_guard guardMutex( id_t id = std::this_thread::get_id() ); + void cleanup( id_t id = std::this_thread::get_id() ); }; } diff --git a/engine/inc/uf/utils/thread/perthread.inl b/engine/inc/uf/utils/thread/perthread.inl index a419bd53..15bc6f2a 100644 --- a/engine/inc/uf/utils/thread/perthread.inl +++ b/engine/inc/uf/utils/thread/perthread.inl @@ -36,6 +36,20 @@ std::lock_guard uf::ThreadUnique::guardMutex( id_t id ) { return std::lock_guard(*m_mutex_container[id]); } template +void uf::ThreadUnique::cleanup( id_t id ) { + for ( auto it = m_container.begin(); it != m_container.end(); ) { + if ( it->first == id ) ++it; + else it = m_container.erase(it); + } + for ( auto it = m_mutex_container.begin(); it != m_mutex_container.end(); ) { + if ( it->first == id ) ++it; + else { + delete it->second; + it = m_mutex_container.erase(it); + } + } +} +template typename uf::ThreadUnique::container_t& uf::ThreadUnique::container() { return m_container; } \ No newline at end of file diff --git a/engine/inc/uf/utils/thread/thread.h b/engine/inc/uf/utils/thread/thread.h index 632ffd64..52b2ab76 100644 --- a/engine/inc/uf/utils/thread/thread.h +++ b/engine/inc/uf/utils/thread/thread.h @@ -16,6 +16,13 @@ #include #include +namespace uf { + namespace thread { + extern UF_API uf::stl::string workerThreadName; + extern UF_API uf::stl::string mainThreadName; + } +} + namespace pod { struct UF_API Thread { typedef std::function function_t; @@ -28,7 +35,10 @@ namespace pod { bool running, terminates; std::mutex* mutex; - std::condition_variable condition; + struct { + std::condition_variable queued; + std::condition_variable finished; + } conditions; std::thread thread; pod::Thread::queue_t queue; @@ -38,7 +48,7 @@ namespace pod { uint affinity = 0; struct UF_API Tasks { - uf::stl::string name = "Aux"; + uf::stl::string name = uf::thread::workerThreadName; bool waits = true; pod::Thread::queue_t container; @@ -60,8 +70,9 @@ namespace uf { extern UF_API bool async; /* Easy to use async helper functions */ - pod::Thread& UF_API fetchWorker( const uf::stl::string& name = "Aux" ); - pod::Thread::Tasks UF_API schedule( const uf::stl::string& name = "Aux", bool waits = true ); + pod::Thread& UF_API fetchWorker( const uf::stl::string& name = uf::thread::workerThreadName ); + pod::Thread::Tasks UF_API schedule( bool multithread, bool waits = true ); + pod::Thread::Tasks UF_API schedule( const uf::stl::string& name = uf::thread::workerThreadName, bool waits = true ); void UF_API execute( pod::Thread::Tasks& tasks ); /* Acts on global threads */ diff --git a/engine/src/engine/asset/asset.cpp b/engine/src/engine/asset/asset.cpp index 8c041483..0f502f11 100644 --- a/engine/src/engine/asset/asset.cpp +++ b/engine/src/engine/asset/asset.cpp @@ -50,7 +50,7 @@ uf::Asset uf::Asset::masterAssetLoader; bool uf::Asset::assertionLoad = true; void uf::Asset::processQueue() { - uf::thread::queue([&]{ +// uf::thread::queue([&]{ mutex.lock(); auto jobs = std::move(this->getComponent>()); while ( !jobs.empty() ) { @@ -68,7 +68,7 @@ void uf::Asset::processQueue() { if ( callback != "" && filename != "" ) uf::hooks.call(callback, payload); } mutex.unlock(); - }); +// }); } void uf::Asset::cache( const uf::stl::string& callback, const uf::Asset::Payload& payload ) { mutex.lock(); diff --git a/engine/src/engine/graph/decode.cpp b/engine/src/engine/graph/decode.cpp index 4f72ac26..2cb00c2a 100644 --- a/engine/src/engine/graph/decode.cpp +++ b/engine/src/engine/graph/decode.cpp @@ -307,9 +307,9 @@ pod::Graph uf::graph::load( const uf::stl::string& filename, const uf::Serialize graph.metadata = metadata; // serializer["metadata"]; #if UF_GRAPH_LOAD_MULTITHREAD - auto tasks = uf::thread::schedule("Async"); + auto tasks = uf::thread::schedule(true); #else - auto tasks = uf::thread::schedule("Main"); + auto tasks = uf::thread::schedule(false); #endif tasks.queue([&]{ diff --git a/engine/src/engine/graph/encode.cpp b/engine/src/engine/graph/encode.cpp index 5228450d..06b50096 100644 --- a/engine/src/engine/graph/encode.cpp +++ b/engine/src/engine/graph/encode.cpp @@ -262,9 +262,9 @@ uf::stl::string uf::graph::save( const pod::Graph& graph, const uf::stl::string& #endif #if UF_GRAPH_LOAD_MULTITHREAD - auto tasks = uf::thread::schedule("Async"); + auto tasks = uf::thread::schedule(true); #else - auto tasks = uf::thread::schedule("Main"); + auto tasks = uf::thread::schedule(false); #endif tasks.queue([&]{ ext::json::reserve( serializer["instances"], graph.instances.size() ); diff --git a/engine/src/engine/graph/graph.cpp b/engine/src/engine/graph/graph.cpp index c2b5e257..b9f54af1 100644 --- a/engine/src/engine/graph/graph.cpp +++ b/engine/src/engine/graph/graph.cpp @@ -1149,7 +1149,7 @@ void uf::graph::tick() { ::newGraphAdded = false; } } -void uf::graph::render() { +void uf::graph::render() { auto& scene = uf::scene::getCurrentScene(); auto& controller = scene.getController(); auto& camera = controller.getComponent(); @@ -1163,10 +1163,10 @@ void uf::graph::render() { uf::graph::storage.buffers.camera.update( (const void*) &camera.data().viewport, sizeof(pod::Camera::Viewports) ); } #endif +#if UF_USE_VULKAN auto* renderMode = uf::renderer::getCurrentRenderMode(); if ( !renderMode ) return; -#if UF_USE_VULKAN for ( auto& buffer : renderMode->buffers ) { if ( !(buffer.usage & uf::renderer::enums::Buffer::UNIFORM) ) continue; if ( buffer.allocationInfo.size != sizeof(pod::Camera::Viewports) ) continue; diff --git a/engine/src/engine/scene/scene.cpp b/engine/src/engine/scene/scene.cpp index 4a2259fb..b66d988e 100644 --- a/engine/src/engine/scene/scene.cpp +++ b/engine/src/engine/scene/scene.cpp @@ -143,14 +143,14 @@ void uf::scene::tick() { auto graph = scene.getGraph(true); #if 1 for ( auto entity : graph ) entity->tick(); - auto& tasks = metadata.tasks; #else + auto& tasks = metadata.tasks; pod::Thread::Tasks tasks = metadata.tasks; tasks.queue([&]{ for ( auto entity : graph ) entity->tick(); }); -#endif uf::thread::execute( tasks ); +#endif } void uf::scene::render() { if ( scenes.empty() ) return; diff --git a/engine/src/ext/opengl/opengl.cpp b/engine/src/ext/opengl/opengl.cpp index 6ac2c93c..37907ddf 100644 --- a/engine/src/ext/opengl/opengl.cpp +++ b/engine/src/ext/opengl/opengl.cpp @@ -205,7 +205,7 @@ void UF_API ext::opengl::initialize() { uf::graph::initialize(); - auto tasks = uf::thread::schedule(settings::invariant::multithreadedRecording ? "Aux" : "Main"); + auto tasks = uf::thread::schedule(settings::invariant::multithreadedRecording); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; tasks.queue([&]{ if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); @@ -377,7 +377,7 @@ void UF_API ext::opengl::tick(){ renderMode->tick(); } - auto tasks = uf::thread::schedule(settings::invariant::multithreadedRecording ? "Aux" : "Main"); + auto tasks = uf::thread::schedule(settings::invariant::multithreadedRecording); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; if ( ext::opengl::states::rebuild || renderMode->rebuild ) tasks.queue([&]{ if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); diff --git a/engine/src/ext/vulkan/buffer.cpp b/engine/src/ext/vulkan/buffer.cpp index ac6a1952..81a3f696 100644 --- a/engine/src/ext/vulkan/buffer.cpp +++ b/engine/src/ext/vulkan/buffer.cpp @@ -22,11 +22,13 @@ void ext::vulkan::Buffer::aliasBuffer( const ext::vulkan::Buffer& buffer ) { .allocationInfo = buffer.allocationInfo, }; */ - this->device = NULL; + this->aliased = true; + this->device = buffer.device; this->buffer = buffer.buffer; this->memory = buffer.memory; this->descriptor = buffer.descriptor; this->alignment = buffer.alignment; + this->address = buffer.address; this->mapped = buffer.mapped; this->usage = buffer.usage; this->memoryProperties = buffer.memoryProperties; @@ -84,18 +86,32 @@ void ext::vulkan::Buffer::allocate( VkBufferCreateInfo bufferCreateInfo ) { allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; } - vmaCreateBuffer( allocator, &bufferCreateInfo, &allocCreateInfo, &buffer, &allocation, &allocationInfo ); + vmaCreateBufferWithAlignment( allocator, &bufferCreateInfo, &allocCreateInfo, alignment, &buffer, &allocation, &allocationInfo ); +} + +size_t ext::vulkan::Buffer::getAddress() { + VkBufferDeviceAddressInfoKHR info{}; + info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + info.buffer = buffer; + return (this->address = vkGetBufferDeviceAddressKHR(this->device ? *this->device : ext::vulkan::device, &info)); +} +size_t ext::vulkan::Buffer::getAddress() const { + VkBufferDeviceAddressInfoKHR info{}; + info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + info.buffer = buffer; + return vkGetBufferDeviceAddressKHR(this->device ? *this->device : ext::vulkan::device, &info); } // RAII ext::vulkan::Buffer::~Buffer() { // this->destroy(); } -void ext::vulkan::Buffer::initialize( ext::vulkan::Device& device ) { +void ext::vulkan::Buffer::initialize( ext::vulkan::Device& device, size_t alignment ) { this->device = &device; + this->alignment = alignment; } void ext::vulkan::Buffer::destroy() { - if ( !device ) return; + if ( !device || aliased ) return; if ( buffer ) { vmaDestroyBuffer( allocator, buffer, allocation ); @@ -145,12 +161,12 @@ bool ext::vulkan::Buffer::update( const void* data, VkDeviceSize length, bool st ); // Copy to staging buffer - VkCommandBuffer copyCommand = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCommand = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::TRANSFER); VkBufferCopy region = {}; region.size = length; vkCmdCopyBuffer(copyCommand, staging.buffer, buffer, 1, ®ion); - device->flushCommandBuffer(copyCommand, true); + device->flushCommandBuffer(copyCommand, Device::QueueEnum::TRANSFER); staging.destroy(); return false; } @@ -174,7 +190,7 @@ void ext::vulkan::Buffers::destroy() { size_t ext::vulkan::Buffers::initializeBuffer( const void* data, VkDeviceSize length, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties, bool stage ) { size_t index = buffers.size(); auto& buffer = buffers.emplace_back(); - buffer.initialize( *device ); + buffer.initialize( *device, requestedAlignment ); buffer.initialize( data, length, usage, memoryProperties, stage ); return index; } diff --git a/engine/src/ext/vulkan/device.cpp b/engine/src/ext/vulkan/device.cpp index a6c2dd54..7cc348b7 100644 --- a/engine/src/ext/vulkan/device.cpp +++ b/engine/src/ext/vulkan/device.cpp @@ -15,6 +15,171 @@ #define UF_MSG_VALIDATION(X) if ( ext::vulkan::settings::validation ) UF_MSG(X, " VULKAN "); namespace { + struct DeviceInfo { + VkPhysicalDevice handle = VK_NULL_HANDLE; + VkPhysicalDeviceProperties properties; + VkPhysicalDeviceFeatures features; + size_t score; + }; + + ::DeviceInfo rate( ext::vulkan::Device& device, VkPhysicalDevice handle ) { + ::DeviceInfo deviceInfo{ .handle = handle }; + + auto& physicalDevice = deviceInfo.handle; + auto& deviceProperties = deviceInfo.properties; + auto& deviceFeatures = deviceInfo.features; + auto& score = deviceInfo.score; + + vkGetPhysicalDeviceProperties(physicalDevice, &deviceProperties); + vkGetPhysicalDeviceFeatures(physicalDevice, &deviceFeatures); + + // Discrete GPUs have a significant performance advantage + if (deviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) score += 1000; + { + score += deviceProperties.limits.maxImageDimension1D; + score += deviceProperties.limits.maxImageDimension2D; + score += deviceProperties.limits.maxImageDimension3D; + score += deviceProperties.limits.maxImageDimensionCube; + score += deviceProperties.limits.maxImageArrayLayers; + score += deviceProperties.limits.maxTexelBufferElements; + score += deviceProperties.limits.maxUniformBufferRange; + score += deviceProperties.limits.maxStorageBufferRange; + score += deviceProperties.limits.maxPushConstantsSize; + score += deviceProperties.limits.maxMemoryAllocationCount; + score += deviceProperties.limits.maxSamplerAllocationCount; + score += deviceProperties.limits.bufferImageGranularity; + score += deviceProperties.limits.sparseAddressSpaceSize; + score += deviceProperties.limits.maxBoundDescriptorSets; + score += deviceProperties.limits.maxPerStageDescriptorSamplers; + score += deviceProperties.limits.maxPerStageDescriptorUniformBuffers; + score += deviceProperties.limits.maxPerStageDescriptorStorageBuffers; + score += deviceProperties.limits.maxPerStageDescriptorSampledImages; + score += deviceProperties.limits.maxPerStageDescriptorStorageImages; + score += deviceProperties.limits.maxPerStageDescriptorInputAttachments; + score += deviceProperties.limits.maxPerStageResources; + score += deviceProperties.limits.maxDescriptorSetSamplers; + score += deviceProperties.limits.maxDescriptorSetUniformBuffers; + score += deviceProperties.limits.maxDescriptorSetUniformBuffersDynamic; + score += deviceProperties.limits.maxDescriptorSetStorageBuffers; + score += deviceProperties.limits.maxDescriptorSetStorageBuffersDynamic; + score += deviceProperties.limits.maxDescriptorSetSampledImages; + score += deviceProperties.limits.maxDescriptorSetStorageImages; + score += deviceProperties.limits.maxDescriptorSetInputAttachments; + score += deviceProperties.limits.maxVertexInputAttributes; + score += deviceProperties.limits.maxVertexInputBindings; + score += deviceProperties.limits.maxVertexInputAttributeOffset; + score += deviceProperties.limits.maxVertexInputBindingStride; + score += deviceProperties.limits.maxVertexOutputComponents; + score += deviceProperties.limits.maxTessellationGenerationLevel; + score += deviceProperties.limits.maxTessellationPatchSize; + score += deviceProperties.limits.maxTessellationControlPerVertexInputComponents; + score += deviceProperties.limits.maxTessellationControlPerVertexOutputComponents; + score += deviceProperties.limits.maxTessellationControlPerPatchOutputComponents; + score += deviceProperties.limits.maxTessellationControlTotalOutputComponents; + score += deviceProperties.limits.maxTessellationEvaluationInputComponents; + score += deviceProperties.limits.maxTessellationEvaluationOutputComponents; + score += deviceProperties.limits.maxGeometryShaderInvocations; + score += deviceProperties.limits.maxGeometryInputComponents; + score += deviceProperties.limits.maxGeometryOutputComponents; + score += deviceProperties.limits.maxGeometryOutputVertices; + score += deviceProperties.limits.maxGeometryTotalOutputComponents; + score += deviceProperties.limits.maxFragmentInputComponents; + score += deviceProperties.limits.maxFragmentOutputAttachments; + score += deviceProperties.limits.maxFragmentDualSrcAttachments; + score += deviceProperties.limits.maxFragmentCombinedOutputResources; + score += deviceProperties.limits.maxComputeSharedMemorySize; + score += deviceProperties.limits.maxComputeWorkGroupInvocations; + score += deviceProperties.limits.subPixelPrecisionBits; + score += deviceProperties.limits.subTexelPrecisionBits; + score += deviceProperties.limits.mipmapPrecisionBits; + score += deviceProperties.limits.maxDrawIndexedIndexValue; + score += deviceProperties.limits.maxDrawIndirectCount; + score += deviceProperties.limits.maxSamplerLodBias; + score += deviceProperties.limits.maxSamplerAnisotropy; + score += deviceProperties.limits.maxViewports; + score += deviceProperties.limits.viewportSubPixelBits; + score += deviceProperties.limits.minMemoryMapAlignment; + score += deviceProperties.limits.minTexelBufferOffsetAlignment; + score += deviceProperties.limits.minUniformBufferOffsetAlignment; + score += deviceProperties.limits.minStorageBufferOffsetAlignment; + score += deviceProperties.limits.minTexelOffset; + score += deviceProperties.limits.maxTexelOffset; + score += deviceProperties.limits.minTexelGatherOffset; + score += deviceProperties.limits.maxTexelGatherOffset; + score += deviceProperties.limits.minInterpolationOffset; + score += deviceProperties.limits.maxInterpolationOffset; + score += deviceProperties.limits.subPixelInterpolationOffsetBits; + score += deviceProperties.limits.maxFramebufferWidth; + score += deviceProperties.limits.maxFramebufferHeight; + score += deviceProperties.limits.maxFramebufferLayers; + score += deviceProperties.limits.framebufferColorSampleCounts; + score += deviceProperties.limits.framebufferDepthSampleCounts; + score += deviceProperties.limits.framebufferStencilSampleCounts; + score += deviceProperties.limits.framebufferNoAttachmentsSampleCounts; + score += deviceProperties.limits.maxColorAttachments; + score += deviceProperties.limits.sampledImageColorSampleCounts; + score += deviceProperties.limits.sampledImageIntegerSampleCounts; + score += deviceProperties.limits.sampledImageDepthSampleCounts; + score += deviceProperties.limits.sampledImageStencilSampleCounts; + score += deviceProperties.limits.storageImageSampleCounts; + score += deviceProperties.limits.maxSampleMaskWords; + score += deviceProperties.limits.timestampComputeAndGraphics; + score += deviceProperties.limits.timestampPeriod; + score += deviceProperties.limits.maxClipDistances; + score += deviceProperties.limits.maxCullDistances; + score += deviceProperties.limits.maxCombinedClipAndCullDistances; + score += deviceProperties.limits.discreteQueuePriorities; + score += deviceProperties.limits.pointSizeGranularity; + score += deviceProperties.limits.lineWidthGranularity; + score += deviceProperties.limits.strictLines; + score += deviceProperties.limits.standardSampleLocations; + score += deviceProperties.limits.optimalBufferCopyOffsetAlignment; + score += deviceProperties.limits.optimalBufferCopyRowPitchAlignment; + score += deviceProperties.limits.nonCoherentAtomSize; + } + // Application can't function without geometry shaders + if ( !deviceFeatures.geometryShader ) return deviceInfo; + // + { + const uf::stl::vector deviceExtensions = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME + }; + uint32_t extensionCount; + vkEnumerateDeviceExtensionProperties( physicalDevice, nullptr, &extensionCount, nullptr ); + uf::stl::vector availableExtensions( extensionCount ); + vkEnumerateDeviceExtensionProperties( physicalDevice, nullptr, &extensionCount, availableExtensions.data() ); + std::set requiredExtensions( deviceExtensions.begin(), deviceExtensions.end() ); + + for ( const auto& extension : availableExtensions ) + requiredExtensions.erase( extension.extensionName ); + + if ( !requiredExtensions.empty() ) return deviceInfo; + } + // + { + VkSurfaceCapabilitiesKHR capabilities; + uf::stl::vector formats; + uf::stl::vector presentModes; + + vkGetPhysicalDeviceSurfaceCapabilitiesKHR( physicalDevice, device.surface, &capabilities ); + + uint32_t formatCount; + vkGetPhysicalDeviceSurfaceFormatsKHR(physicalDevice, device.surface, &formatCount, nullptr); + if ( formatCount != 0 ) { + formats.resize( formatCount ); + vkGetPhysicalDeviceSurfaceFormatsKHR( physicalDevice, device.surface, &formatCount, formats.data() ); + } + uint32_t presentModeCount; + vkGetPhysicalDeviceSurfacePresentModesKHR( physicalDevice, device.surface, &presentModeCount, nullptr ); + if ( presentModeCount != 0 ) { + presentModes.resize(presentModeCount); + vkGetPhysicalDeviceSurfacePresentModesKHR( physicalDevice, device.surface, &presentModeCount, presentModes.data() ); + } + if ( formats.empty() || presentModes.empty() ) return deviceInfo; + } + return deviceInfo; + } + #if UF_USE_OPENVR void VRInstanceExtensions( uf::stl::vector& requested ) { if ( !vr::VRCompositor() ) return; @@ -255,166 +420,11 @@ uint32_t ext::vulkan::Device::getMemoryType( uint32_t typeBits, VkMemoryProperty UF_EXCEPTION("Vulkan error: could not find a matching memory type"); } -int ext::vulkan::Device::rate( VkPhysicalDevice device ) { - VkPhysicalDeviceProperties deviceProperties; - vkGetPhysicalDeviceProperties(device, &deviceProperties); - - VkPhysicalDeviceFeatures deviceFeatures; - vkGetPhysicalDeviceFeatures(device, &deviceFeatures); - int score = 0; - // Discrete GPUs have a significant performance advantage - if (deviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) score += 1000; - { - score += deviceProperties.limits.maxImageDimension1D; - score += deviceProperties.limits.maxImageDimension2D; - score += deviceProperties.limits.maxImageDimension3D; - score += deviceProperties.limits.maxImageDimensionCube; - score += deviceProperties.limits.maxImageArrayLayers; - score += deviceProperties.limits.maxTexelBufferElements; - score += deviceProperties.limits.maxUniformBufferRange; - score += deviceProperties.limits.maxStorageBufferRange; - score += deviceProperties.limits.maxPushConstantsSize; - score += deviceProperties.limits.maxMemoryAllocationCount; - score += deviceProperties.limits.maxSamplerAllocationCount; - score += deviceProperties.limits.bufferImageGranularity; - score += deviceProperties.limits.sparseAddressSpaceSize; - score += deviceProperties.limits.maxBoundDescriptorSets; - score += deviceProperties.limits.maxPerStageDescriptorSamplers; - score += deviceProperties.limits.maxPerStageDescriptorUniformBuffers; - score += deviceProperties.limits.maxPerStageDescriptorStorageBuffers; - score += deviceProperties.limits.maxPerStageDescriptorSampledImages; - score += deviceProperties.limits.maxPerStageDescriptorStorageImages; - score += deviceProperties.limits.maxPerStageDescriptorInputAttachments; - score += deviceProperties.limits.maxPerStageResources; - score += deviceProperties.limits.maxDescriptorSetSamplers; - score += deviceProperties.limits.maxDescriptorSetUniformBuffers; - score += deviceProperties.limits.maxDescriptorSetUniformBuffersDynamic; - score += deviceProperties.limits.maxDescriptorSetStorageBuffers; - score += deviceProperties.limits.maxDescriptorSetStorageBuffersDynamic; - score += deviceProperties.limits.maxDescriptorSetSampledImages; - score += deviceProperties.limits.maxDescriptorSetStorageImages; - score += deviceProperties.limits.maxDescriptorSetInputAttachments; - score += deviceProperties.limits.maxVertexInputAttributes; - score += deviceProperties.limits.maxVertexInputBindings; - score += deviceProperties.limits.maxVertexInputAttributeOffset; - score += deviceProperties.limits.maxVertexInputBindingStride; - score += deviceProperties.limits.maxVertexOutputComponents; - score += deviceProperties.limits.maxTessellationGenerationLevel; - score += deviceProperties.limits.maxTessellationPatchSize; - score += deviceProperties.limits.maxTessellationControlPerVertexInputComponents; - score += deviceProperties.limits.maxTessellationControlPerVertexOutputComponents; - score += deviceProperties.limits.maxTessellationControlPerPatchOutputComponents; - score += deviceProperties.limits.maxTessellationControlTotalOutputComponents; - score += deviceProperties.limits.maxTessellationEvaluationInputComponents; - score += deviceProperties.limits.maxTessellationEvaluationOutputComponents; - score += deviceProperties.limits.maxGeometryShaderInvocations; - score += deviceProperties.limits.maxGeometryInputComponents; - score += deviceProperties.limits.maxGeometryOutputComponents; - score += deviceProperties.limits.maxGeometryOutputVertices; - score += deviceProperties.limits.maxGeometryTotalOutputComponents; - score += deviceProperties.limits.maxFragmentInputComponents; - score += deviceProperties.limits.maxFragmentOutputAttachments; - score += deviceProperties.limits.maxFragmentDualSrcAttachments; - score += deviceProperties.limits.maxFragmentCombinedOutputResources; - score += deviceProperties.limits.maxComputeSharedMemorySize; - score += deviceProperties.limits.maxComputeWorkGroupInvocations; - score += deviceProperties.limits.subPixelPrecisionBits; - score += deviceProperties.limits.subTexelPrecisionBits; - score += deviceProperties.limits.mipmapPrecisionBits; - score += deviceProperties.limits.maxDrawIndexedIndexValue; - score += deviceProperties.limits.maxDrawIndirectCount; - score += deviceProperties.limits.maxSamplerLodBias; - score += deviceProperties.limits.maxSamplerAnisotropy; - score += deviceProperties.limits.maxViewports; - score += deviceProperties.limits.viewportSubPixelBits; - score += deviceProperties.limits.minMemoryMapAlignment; - score += deviceProperties.limits.minTexelBufferOffsetAlignment; - score += deviceProperties.limits.minUniformBufferOffsetAlignment; - score += deviceProperties.limits.minStorageBufferOffsetAlignment; - score += deviceProperties.limits.minTexelOffset; - score += deviceProperties.limits.maxTexelOffset; - score += deviceProperties.limits.minTexelGatherOffset; - score += deviceProperties.limits.maxTexelGatherOffset; - score += deviceProperties.limits.minInterpolationOffset; - score += deviceProperties.limits.maxInterpolationOffset; - score += deviceProperties.limits.subPixelInterpolationOffsetBits; - score += deviceProperties.limits.maxFramebufferWidth; - score += deviceProperties.limits.maxFramebufferHeight; - score += deviceProperties.limits.maxFramebufferLayers; - score += deviceProperties.limits.framebufferColorSampleCounts; - score += deviceProperties.limits.framebufferDepthSampleCounts; - score += deviceProperties.limits.framebufferStencilSampleCounts; - score += deviceProperties.limits.framebufferNoAttachmentsSampleCounts; - score += deviceProperties.limits.maxColorAttachments; - score += deviceProperties.limits.sampledImageColorSampleCounts; - score += deviceProperties.limits.sampledImageIntegerSampleCounts; - score += deviceProperties.limits.sampledImageDepthSampleCounts; - score += deviceProperties.limits.sampledImageStencilSampleCounts; - score += deviceProperties.limits.storageImageSampleCounts; - score += deviceProperties.limits.maxSampleMaskWords; - score += deviceProperties.limits.timestampComputeAndGraphics; - score += deviceProperties.limits.timestampPeriod; - score += deviceProperties.limits.maxClipDistances; - score += deviceProperties.limits.maxCullDistances; - score += deviceProperties.limits.maxCombinedClipAndCullDistances; - score += deviceProperties.limits.discreteQueuePriorities; - score += deviceProperties.limits.pointSizeGranularity; - score += deviceProperties.limits.lineWidthGranularity; - score += deviceProperties.limits.strictLines; - score += deviceProperties.limits.standardSampleLocations; - score += deviceProperties.limits.optimalBufferCopyOffsetAlignment; - score += deviceProperties.limits.optimalBufferCopyRowPitchAlignment; - score += deviceProperties.limits.nonCoherentAtomSize; - } - // Application can't function without geometry shaders - if ( !deviceFeatures.geometryShader ) return 0; - // - { - const uf::stl::vector deviceExtensions = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME - }; - uint32_t extensionCount; - vkEnumerateDeviceExtensionProperties( device, nullptr, &extensionCount, nullptr ); - uf::stl::vector availableExtensions( extensionCount ); - vkEnumerateDeviceExtensionProperties( device, nullptr, &extensionCount, availableExtensions.data() ); - std::set requiredExtensions( deviceExtensions.begin(), deviceExtensions.end() ); - - for ( const auto& extension : availableExtensions ) - requiredExtensions.erase( extension.extensionName ); - - if ( !requiredExtensions.empty() ) return 0; - } - // - { - VkSurfaceCapabilitiesKHR capabilities; - uf::stl::vector formats; - uf::stl::vector presentModes; - - vkGetPhysicalDeviceSurfaceCapabilitiesKHR( device, this->surface, &capabilities ); - - uint32_t formatCount; - vkGetPhysicalDeviceSurfaceFormatsKHR(device, this->surface, &formatCount, nullptr); - if ( formatCount != 0 ) { - formats.resize( formatCount ); - vkGetPhysicalDeviceSurfaceFormatsKHR( device, this->surface, &formatCount, formats.data() ); - } - uint32_t presentModeCount; - vkGetPhysicalDeviceSurfacePresentModesKHR( device, this->surface, &presentModeCount, nullptr ); - if ( presentModeCount != 0 ) { - presentModes.resize(presentModeCount); - vkGetPhysicalDeviceSurfacePresentModesKHR( device, this->surface, &presentModeCount, presentModes.data() ); - } - if ( formats.empty() || presentModes.empty() ) return 0; - } - if ( settings::gpuID != -1 && deviceProperties.deviceID == settings::gpuID ) { - score = std::numeric_limits::max(); - } - UF_MSG_VALIDATION("Device name: " << deviceProperties.deviceName << " (" << deviceProperties.deviceID << ") has a score of " << score); - return score; -} - VkCommandBuffer ext::vulkan::Device::createCommandBuffer( VkCommandBufferLevel level, bool begin ){ - VkCommandBufferAllocateInfo cmdBufAllocateInfo = ext::vulkan::initializers::commandBufferAllocateInfo( getCommandPool(QueueEnum::TRANSFER), level, 1 ); + return createCommandBuffer( level, QueueEnum::TRANSFER, begin ); +} +VkCommandBuffer ext::vulkan::Device::createCommandBuffer( VkCommandBufferLevel level, QueueEnum queue, bool begin ){ + VkCommandBufferAllocateInfo cmdBufAllocateInfo = ext::vulkan::initializers::commandBufferAllocateInfo( getCommandPool(queue), level, 1 ); VkCommandBuffer commandBuffer; VK_CHECK_RESULT( vkAllocateCommandBuffers( logicalDevice, &cmdBufAllocateInfo, &commandBuffer ) ); @@ -427,6 +437,9 @@ VkCommandBuffer ext::vulkan::Device::createCommandBuffer( VkCommandBufferLevel l } void ext::vulkan::Device::flushCommandBuffer( VkCommandBuffer commandBuffer, bool free ) { + return flushCommandBuffer( commandBuffer, QueueEnum::TRANSFER, free ); +} +void ext::vulkan::Device::flushCommandBuffer( VkCommandBuffer commandBuffer, QueueEnum queue, bool free ) { if ( commandBuffer == VK_NULL_HANDLE ) return; VK_CHECK_RESULT( vkEndCommandBuffer( commandBuffer ) ); @@ -439,18 +452,12 @@ void ext::vulkan::Device::flushCommandBuffer( VkCommandBuffer commandBuffer, boo VkFenceCreateInfo fenceInfo = ext::vulkan::initializers::fenceCreateInfo(VK_FLAGS_NONE); VkFence fence; VK_CHECK_RESULT(vkCreateFence(logicalDevice, &fenceInfo, nullptr, &fence)); - - // Submit to the queue - VK_CHECK_RESULT(vkQueueSubmit( getQueue( QueueEnum::TRANSFER ), 1, &submitInfo, fence)); - // vkQueueSubmit(device.queues.transfer, 1, &submitInfo, fence); - // Wait for the fence to signal that command buffer has finished executing + VK_CHECK_RESULT(vkQueueSubmit( getQueue( queue ), 1, &submitInfo, fence)); VK_CHECK_RESULT(vkWaitForFences(logicalDevice, 1, &fence, VK_TRUE, VK_DEFAULT_FENCE_TIMEOUT)); - vkDestroyFence(logicalDevice, fence, nullptr); - - if ( free ) vkFreeCommandBuffers(logicalDevice, getCommandPool( QueueEnum::TRANSFER ), 1, &commandBuffer); + if ( free ) vkFreeCommandBuffers(logicalDevice, getCommandPool( queue ), 1, &commandBuffer); } - +#if 0 VkResult ext::vulkan::Device::createBuffer( VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties, VkDeviceSize size, VkBuffer* buffer, VkDeviceMemory* memory, const void* data ) { // Create the buffer handle VkBufferCreateInfo bufferCreateInfo = ext::vulkan::initializers::bufferCreateInfo(usage, size); @@ -488,7 +495,7 @@ VkResult ext::vulkan::Device::createBuffer( VkBufferUsageFlags usage, VkMemoryPr VK_CHECK_RESULT(vkBindBufferMemory(logicalDevice, *buffer, *memory, 0)); return VK_SUCCESS; } - +#endif VkResult ext::vulkan::Device::createBuffer( VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryProperties, @@ -701,33 +708,48 @@ void ext::vulkan::Device::initialize() { window->createSurface( instance, surface ); } // Create physical device + + uint32_t deviceCount = 0; + uf::stl::vector physicalDevices; + uf::stl::vector<::DeviceInfo> deviceInfos; // defined outside if we want to "multi"-gpu { - uint32_t deviceCount = 0; vkEnumeratePhysicalDevices( this->instance, &deviceCount, nullptr ); if ( deviceCount == 0 ) UF_EXCEPTION("Vulkan error: failed to find GPUs with Vulkan support!"); - uf::stl::vector devices(deviceCount); - vkEnumeratePhysicalDevices( this->instance, &deviceCount, devices.data() ); - - int bestScore = 0; - for ( const VkPhysicalDevice& device : devices ) { - int score = rate( device ); - if ( score <= bestScore ) continue; - bestScore = score; - physicalDevice = device; - } - /* - // Use an ordered map to automatically sort candidates by increasing score - std::multimap candidates; - for ( const VkPhysicalDevice& device : devices ) { - int score = rate( device ); - candidates.insert( std::make_pair(score, device) ); + deviceInfos.reserve(deviceCount); + physicalDevices.resize(deviceCount); + + vkEnumeratePhysicalDevices( this->instance, &deviceCount, physicalDevices.data() ); + + size_t bestDeviceIndex = 0; + for ( size_t i = 0; i < deviceCount; ++i ) { + auto& deviceInfo = deviceInfos.emplace_back( rate(*this, physicalDevices[i]) ); + UF_MSG_VALIDATION("[" << i << "] " + "Found device: " << deviceInfo.properties.deviceName << " (" + "score: " << deviceInfo.score << " | " + "device ID: " << deviceInfo.properties.deviceID << " | " + "vendor ID: " << deviceInfo.properties.vendorID << " | " + "API version: " << deviceInfo.properties.apiVersion << " | " + "driver version: " << deviceInfo.properties.driverVersion << ")" + ); + if ( settings::experimental::enableMultiGPU && deviceInfos[bestDeviceIndex].properties.vendorID != deviceInfo.properties.vendorID ) settings::experimental::enableMultiGPU = false; + if ( deviceInfos[bestDeviceIndex].score >= deviceInfo.score ) continue; + bestDeviceIndex = i; } - // Check if the best candidate is suitable at all - if ( candidates.rbegin()->first <= 0 ) UF_EXCEPTION("Vulkan error: failed to find a suitable GPU!"); - this->physicalDevice = candidates.rbegin()->second; - */ + if ( 0 <= ext::vulkan::settings::gpuID && ext::vulkan::settings::gpuID < deviceCount ) { + bestDeviceIndex = ext::vulkan::settings::gpuID; + } + auto& deviceInfo = deviceInfos[bestDeviceIndex]; + this->physicalDevice = deviceInfo.handle; + + UF_MSG_VALIDATION("Using device #" << bestDeviceIndex << " (" + "score: " << deviceInfo.score << " | " + "device ID: " << deviceInfo.properties.deviceID << " | " + "vendor ID: " << deviceInfo.properties.vendorID << " | " + "API version: " << deviceInfo.properties.apiVersion << " | " + "driver version: " << deviceInfo.properties.driverVersion << ")" + ); } // Update properties { @@ -738,7 +760,6 @@ void ext::vulkan::Device::initialize() { // Memory properties are used regularly for creating all kinds of buffers vkGetPhysicalDeviceMemoryProperties( this->physicalDevice, &memoryProperties ); } - UF_MSG_VALIDATION("Using device " << properties.deviceName << " (" << properties.deviceID << ")"); { properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -771,7 +792,7 @@ void ext::vulkan::Device::initialize() { // Create logical device { bool useSwapChain = true; - VkQueueFlags requestedQueueTypes = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + VkQueueFlags requestedQueueTypes = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; // | VK_QUEUE_TRANSFER_BIT; uf::stl::vector requestedExtensions; requestedExtensions.insert( requestedExtensions.end(), ext::vulkan::settings::requestedDeviceExtensions.begin(), ext::vulkan::settings::requestedDeviceExtensions.end() ); #if UF_USE_OPENVR @@ -861,11 +882,19 @@ void ext::vulkan::Device::initialize() { enableRequestedDeviceFeatures( *this ); + + VkDeviceCreateInfo deviceCreateInfo = {}; deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceCreateInfo.queueCreateInfoCount = static_cast(queueCreateInfos.size());; deviceCreateInfo.pQueueCreateInfos = queueCreateInfos.data(); - deviceCreateInfo.pEnabledFeatures = &enabledFeatures; + // deviceCreateInfo.pEnabledFeatures = &enabledFeatures; + deviceCreateInfo.pEnabledFeatures = nullptr; + + VkDeviceGroupDeviceCreateInfo groupDeviceCreateInfo = {}; + groupDeviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO; + groupDeviceCreateInfo.physicalDeviceCount = physicalDevices.size(); + groupDeviceCreateInfo.pPhysicalDevices = physicalDevices.data(); if ( deviceExtensions.size() > 0 ) { deviceCreateInfo.enabledExtensionCount = (uint32_t) deviceExtensions.size(); @@ -876,14 +905,13 @@ void ext::vulkan::Device::initialize() { VkPhysicalDeviceDescriptorIndexingFeatures descriptorIndexingFeatures{}; VkPhysicalDeviceShaderDrawParametersFeatures shaderDrawParametersFeatures{}; VkPhysicalDeviceRobustness2FeaturesEXT robustnessFeatures{}; - { - deviceCreateInfo.pEnabledFeatures = nullptr; - deviceCreateInfo.pNext = &physicalDeviceFeatures2; - } + VkPhysicalDeviceBufferDeviceAddressFeatures bufferDeviceAddresFeatures{}; + VkPhysicalDeviceRayTracingPipelineFeaturesKHR rayTracingPipelineFeatures{}; + VkPhysicalDeviceAccelerationStructureFeaturesKHR accelerationStructureFeatures{}; + { physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.features = enabledFeatures; - physicalDeviceFeatures2.pNext = &descriptorIndexingFeatures; } { descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; @@ -891,21 +919,43 @@ void ext::vulkan::Device::initialize() { descriptorIndexingFeatures.shaderStorageImageArrayNonUniformIndexing = VK_TRUE; descriptorIndexingFeatures.runtimeDescriptorArray = VK_TRUE; descriptorIndexingFeatures.descriptorBindingVariableDescriptorCount = VK_TRUE; - descriptorIndexingFeatures.pNext = &shaderDrawParametersFeatures; } { shaderDrawParametersFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES; shaderDrawParametersFeatures.shaderDrawParameters = VK_TRUE; - shaderDrawParametersFeatures.pNext = &robustnessFeatures; } { robustnessFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; robustnessFeatures.nullDescriptor = VK_TRUE; } - - if ( vkCreateDevice( this->physicalDevice, &deviceCreateInfo, nullptr, &this->logicalDevice) != VK_SUCCESS ) { - UF_EXCEPTION("Vulkan error: failed to create logical device!"); + { + bufferDeviceAddresFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES; + bufferDeviceAddresFeatures.bufferDeviceAddress = VK_TRUE; } + { + rayTracingPipelineFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR; + rayTracingPipelineFeatures.rayTracingPipeline = VK_TRUE; + } + { + accelerationStructureFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + accelerationStructureFeatures.accelerationStructure = VK_TRUE; + } + + deviceCreateInfo.pNext = &physicalDeviceFeatures2; + physicalDeviceFeatures2.pNext = &descriptorIndexingFeatures; + descriptorIndexingFeatures.pNext = &shaderDrawParametersFeatures; + shaderDrawParametersFeatures.pNext = &robustnessFeatures; + robustnessFeatures.pNext = &bufferDeviceAddresFeatures; + bufferDeviceAddresFeatures.pNext = &rayTracingPipelineFeatures; + rayTracingPipelineFeatures.pNext = &accelerationStructureFeatures; + + if ( settings::experimental::enableMultiGPU ) { + UF_MSG_DEBUG("Multiple devices supported, using " << groupDeviceCreateInfo.physicalDeviceCount << " devices..."); + accelerationStructureFeatures.pNext = &groupDeviceCreateInfo; + } + + if ( vkCreateDevice( this->physicalDevice, &deviceCreateInfo, nullptr, &this->logicalDevice) != VK_SUCCESS ) UF_EXCEPTION("Vulkan error: failed to create logical device!"); + { ext::json::Value payload = ext::json::array(); for ( auto* c_str : deviceExtensions ) payload.emplace_back( uf::stl::string(c_str) ); @@ -949,17 +999,16 @@ void ext::vulkan::Device::initialize() { i++; } + UF_MSG_VALIDATION("Graphics queue: " << device.queueFamilyIndices.graphics); + UF_MSG_VALIDATION("Compute queue: " << device.queueFamilyIndices.compute); + UF_MSG_VALIDATION("Transfer queue: " << device.queueFamilyIndices.transfer); + UF_MSG_VALIDATION("Present queue: " << device.queueFamilyIndices.present); + device.queueFamilyIndices.present = presentQueueNodeIndex; getQueue( QueueEnum::GRAPHICS ); getQueue( QueueEnum::PRESENT ); getQueue( QueueEnum::COMPUTE ); getQueue( QueueEnum::TRANSFER ); - /* - vkGetDeviceQueue( device, device.queueFamilyIndices.graphics, 0, &queues.graphics[std::this_thread::get_id()] ); - vkGetDeviceQueue( device, device.queueFamilyIndices.present, 0, &queues.present[std::this_thread::get_id()] ); - vkGetDeviceQueue( device, device.queueFamilyIndices.compute, 0, &queues.compute[std::this_thread::get_id()] ); - vkGetDeviceQueue( device, device.queueFamilyIndices.transfer, 0, &queues.transfer[std::this_thread::get_id()] ); - */ } // Set formats { @@ -1069,10 +1118,24 @@ void ext::vulkan::Device::initialize() { allocatorInfo.physicalDevice = physicalDevice; allocatorInfo.instance = instance; allocatorInfo.device = logicalDevice; + allocatorInfo.flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT; allocatorInfo.pVulkanFunctions = &vulkanFunctions; vmaCreateAllocator(&allocatorInfo, &allocator); } + + { + vkGetBufferDeviceAddressKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetBufferDeviceAddressKHR")); + vkCmdBuildAccelerationStructuresKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCmdBuildAccelerationStructuresKHR")); + vkBuildAccelerationStructuresKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkBuildAccelerationStructuresKHR")); + vkCreateAccelerationStructureKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCreateAccelerationStructureKHR")); + vkDestroyAccelerationStructureKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR")); + vkGetAccelerationStructureBuildSizesKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetAccelerationStructureBuildSizesKHR")); + vkGetAccelerationStructureDeviceAddressKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetAccelerationStructureDeviceAddressKHR")); + vkCmdTraceRaysKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCmdTraceRaysKHR")); + vkGetRayTracingShaderGroupHandlesKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetRayTracingShaderGroupHandlesKHR")); + vkCreateRayTracingPipelinesKHR = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCreateRayTracingPipelinesKHR")); + } } void ext::vulkan::Device::destroy() { diff --git a/engine/src/ext/vulkan/graphic.cpp b/engine/src/ext/vulkan/graphic.cpp index a116c90a..469fc994 100644 --- a/engine/src/ext/vulkan/graphic.cpp +++ b/engine/src/ext/vulkan/graphic.cpp @@ -32,9 +32,6 @@ void ext::vulkan::Pipeline::initialize( const Graphic& graphic, const GraphicDes assert( shaders.size() > 0 ); uint32_t subpass = descriptor.subpass; - - RenderMode& renderMode = ext::vulkan::getRenderMode( descriptor.renderMode, true ); - auto& renderTarget = renderMode.getRenderTarget( descriptor.renderTarget ); uf::stl::vector descriptorSetLayoutBindings; uf::stl::vector pushConstantRanges; @@ -103,6 +100,87 @@ void ext::vulkan::Pipeline::initialize( const Graphic& graphic, const GraphicDes pPipelineLayoutCreateInfo.pPushConstantRanges = pushConstantRanges.data(); VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout)); } + // raytrace + { + uf::stl::vector shaderGroups; + for ( auto* shader : shaders ) { + if ( shader->descriptor.stage != VK_SHADER_STAGE_RAYGEN_BIT_KHR && shader->descriptor.stage != VK_SHADER_STAGE_MISS_BIT_KHR && shader->descriptor.stage != VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ) continue; + + size_t shaderID = static_cast(shaderDescriptors.size()); + bool isHit = shader->descriptor.stage & (VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR | VK_SHADER_STAGE_INTERSECTION_BIT_KHR); + + auto& shaderGroup = shaderGroups.emplace_back(); + shaderGroup.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + shaderGroup.type = !isHit ? VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR : VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + shaderGroup.generalShader = !isHit ? shaderID : VK_SHADER_UNUSED_KHR; + + shaderGroup.closestHitShader = (shader->descriptor.stage & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) ? shaderID : VK_SHADER_UNUSED_KHR; + shaderGroup.anyHitShader = (shader->descriptor.stage & VK_SHADER_STAGE_ANY_HIT_BIT_KHR) ? shaderID : VK_SHADER_UNUSED_KHR; + shaderGroup.intersectionShader = (shader->descriptor.stage & VK_SHADER_STAGE_INTERSECTION_BIT_KHR) ? shaderID : VK_SHADER_UNUSED_KHR; + + shaderDescriptors.emplace_back(shader->descriptor); + } + + if ( !shaderGroups.empty() ) { + VkRayTracingPipelineCreateInfoKHR rayTracingPipelineCI{}; + rayTracingPipelineCI.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; + rayTracingPipelineCI.stageCount = static_cast(shaderDescriptors.size()); + rayTracingPipelineCI.pStages = shaderDescriptors.data(); + rayTracingPipelineCI.groupCount = static_cast(shaderGroups.size()); + rayTracingPipelineCI.pGroups = shaderGroups.data(); + rayTracingPipelineCI.maxPipelineRayRecursionDepth = 1; + rayTracingPipelineCI.layout = pipelineLayout; + VK_CHECK_RESULT(vkCreateRayTracingPipelinesKHR(device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &rayTracingPipelineCI, nullptr, &pipeline)); + + VkPhysicalDeviceRayTracingPipelinePropertiesKHR rayTracingPipelineProperties{}; + rayTracingPipelineProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2 deviceProperties2{}; + deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + deviceProperties2.pNext = &rayTracingPipelineProperties; + + vkGetPhysicalDeviceProperties2(device.physicalDevice, &deviceProperties2); + + const uint32_t handleSize = rayTracingPipelineProperties.shaderGroupHandleSize; + const uint32_t handleSizeAligned = ALIGNED_SIZE(rayTracingPipelineProperties.shaderGroupHandleSize, rayTracingPipelineProperties.shaderGroupHandleAlignment); + const uint32_t groupCount = static_cast(shaderGroups.size()); + const uint32_t sbtSize = groupCount * handleSizeAligned; + const VkBufferUsageFlags bufferUsageFlags = VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + + std::vector shaderHandleStorage(sbtSize); + VK_CHECK_RESULT(vkGetRayTracingShaderGroupHandlesKHR(device, pipeline, 0, groupCount, sbtSize, shaderHandleStorage.data())); + + requestedAlignment = rayTracingPipelineProperties.shaderGroupBaseAlignment; + size_t raygenBufferIndex = initializeBuffer((const void*) (shaderHandleStorage.data() + handleSizeAligned * 0), handleSize, bufferUsageFlags); + size_t raymissBufferIndex = initializeBuffer((const void*) (shaderHandleStorage.data() + handleSizeAligned * 1), handleSize, bufferUsageFlags); + size_t rayhitBufferIndex = initializeBuffer((const void*) (shaderHandleStorage.data() + handleSizeAligned * 2), handleSize, bufferUsageFlags); + requestedAlignment = 0; + + Buffer raygenBuffer; raygenBuffer.aliasBuffer( buffers[raygenBufferIndex] ); + Buffer raymissBuffer; raymissBuffer.aliasBuffer( buffers[raymissBufferIndex] ); + Buffer rayhitBuffer; rayhitBuffer.aliasBuffer( buffers[rayhitBufferIndex] ); + + auto& raygenShaderSbtEntry = sbtEntries.emplace_back(); + raygenShaderSbtEntry.deviceAddress = raygenBuffer.getAddress(); + raygenShaderSbtEntry.stride = handleSizeAligned; + raygenShaderSbtEntry.size = handleSizeAligned; + + auto& raymissShaderSbtEntry = sbtEntries.emplace_back(); + raymissShaderSbtEntry.deviceAddress = raymissBuffer.getAddress(); + raymissShaderSbtEntry.stride = handleSizeAligned; + raymissShaderSbtEntry.size = handleSizeAligned; + + auto& rayhitShaderSbtEntry = sbtEntries.emplace_back(); + rayhitShaderSbtEntry.deviceAddress = rayhitBuffer.getAddress(); + rayhitShaderSbtEntry.stride = handleSizeAligned; + rayhitShaderSbtEntry.size = handleSizeAligned; + + auto& raycallShaderSbtEntry = sbtEntries.emplace_back(); + + return; + } + } + // Compute for ( auto* shaderPointer : shaders ) { auto& shader = *shaderPointer; @@ -120,6 +198,9 @@ void ext::vulkan::Pipeline::initialize( const Graphic& graphic, const GraphicDes } // Graphic { + RenderMode& renderMode = ext::vulkan::getRenderMode( descriptor.renderMode, true ); + auto& renderTarget = renderMode.getRenderTarget( descriptor.renderTarget ); + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = ext::vulkan::initializers::pipelineInputAssemblyStateCreateInfo( descriptor.topology, 0, @@ -280,7 +361,7 @@ void ext::vulkan::Pipeline::initialize( const Graphic& graphic, const GraphicDes PIPELINE_INITIALIZATION_INVALID: VK_DEBUG_VALIDATION_MESSAGE("Pipeline initialization invalid, updating next tick..."); - uf::thread::queue( uf::thread::get("Main"), [&]{ + uf::thread::queue( uf::thread::get(uf::thread::mainThreadName), [&]{ this->initialize( graphic, descriptor ); }); return; @@ -293,9 +374,12 @@ void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescrip auto shaders = getShaders( graphic.material.shaders ); for ( auto* shader : shaders ) { if ( shader->descriptor.stage == VK_SHADER_STAGE_COMPUTE_BIT ) bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + if ( shader->descriptor.stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR ) bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + if ( shader->descriptor.stage == VK_SHADER_STAGE_MISS_BIT_KHR ) bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + if ( shader->descriptor.stage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ) bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; #if 1 if ( shader->metadata.definitions.pushConstants.count("PushConstant") > 0 ) { - if ( shader->descriptor.stage == VK_SHADER_STAGE_VERTEX_BIT || shader->descriptor.stage == VK_SHADER_STAGE_COMPUTE_BIT ) { + if ( shader->descriptor.stage == VK_SHADER_STAGE_VERTEX_BIT || shader->descriptor.stage == VK_SHADER_STAGE_COMPUTE_BIT || shader->descriptor.stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR ) { struct PushConstant { uint32_t pass; uint32_t draw; @@ -312,26 +396,6 @@ void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescrip vkCmdPushConstants( commandBuffer, pipelineLayout, shader->descriptor.stage, 0, size, data ); } } - /* - size_t offset = 0; - for ( auto& pushConstant : shader->pushConstants ) { - if ( shader->metadata.definitions.pushConstants.count("PushConstant") > 0 ) { - if ( shader->descriptor.stage == VK_SHADER_STAGE_VERTEX_BIT ) { - struct PushConstant { - uint32_t pass; - uint32_t draw; - } pushConstant = { pass, draw }; - ( commandBuffer, pipelineLayout, shader->descriptor.stage, 0, sizeof(pushConstant), &pushConstant ); - } - } else { - size_t len = pushConstant.data().len; - void* pointer = pushConstant.data().data; - if ( len > 0 && pointer ) { - vkCmdPushConstants( commandBuffer, pipelineLayout, shader->descriptor.stage, 0, len, pointer ); - } - } - } - */ } // Bind descriptor sets describing shader binding points vkCmdBindDescriptorSets(commandBuffer, bindPoint, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); @@ -339,7 +403,19 @@ void ext::vulkan::Pipeline::record( const Graphic& graphic, const GraphicDescrip // The pipeline (state object) contains all states of the rendering pipeline, binding it will set all the states specified at pipeline creation time vkCmdBindPipeline(commandBuffer, bindPoint, pipeline); - if ( bindPoint == VK_PIPELINE_BIND_POINT_COMPUTE && descriptor.inputs.dispatch.x != 0 && descriptor.inputs.dispatch.y != 0 && descriptor.inputs.dispatch.z != 0 ) { + if ( bindPoint == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR ) { + vkCmdTraceRaysKHR( + commandBuffer, + &sbtEntries[0], + &sbtEntries[1], + &sbtEntries[2], + &sbtEntries[3], + descriptor.inputs.width ? descriptor.inputs.width : ext::vulkan::settings::width, + descriptor.inputs.height ? descriptor.inputs.height : ext::vulkan::settings::height, + 1 + ); + // UF_MSG_DEBUG("Target render mode: " << descriptor.renderMode << " | " << bindPoint << " " << pipeline << " " << pipelineLayout << " " << descriptorSet << " " << sbtEntries.size() << " " << descriptor.inputs.width << " " << descriptor.inputs.height ); + } else if ( bindPoint == VK_PIPELINE_BIND_POINT_COMPUTE && descriptor.inputs.dispatch.x != 0 && descriptor.inputs.dispatch.y != 0 && descriptor.inputs.dispatch.z != 0 ) { vkCmdDispatch(commandBuffer, descriptor.inputs.dispatch.x, descriptor.inputs.dispatch.y, descriptor.inputs.dispatch.z); } } @@ -361,6 +437,7 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip struct Infos { uf::stl::vector uniform; uf::stl::vector storage; + uf::stl::vector accelerationStructure; uf::stl::vector image; uf::stl::vector image2D; @@ -382,16 +459,25 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip for ( auto& buffer : renderMode.buffers ) { if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); + if ( buffer.usage & uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE ) infos.accelerationStructure.emplace_back(buffer.descriptor); } // add per-shader buffers for ( auto& buffer : shader->buffers ) { if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); + if ( buffer.usage & uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE ) infos.accelerationStructure.emplace_back(buffer.descriptor); } // add per-pipeline buffers for ( auto& buffer : this->buffers ) { if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); + if ( buffer.usage & uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE ) infos.accelerationStructure.emplace_back(buffer.descriptor); + } + // add per-graphics buffers + for ( auto& buffer : graphic.buffers ) { + if ( buffer.usage & uf::renderer::enums::Buffer::UNIFORM ) infos.uniform.emplace_back(buffer.descriptor); + if ( buffer.usage & uf::renderer::enums::Buffer::STORAGE ) infos.storage.emplace_back(buffer.descriptor); + if ( buffer.usage & uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE ) infos.accelerationStructure.emplace_back(buffer.descriptor); } if ( descriptor.subpass < renderTarget.passes.size() ) { @@ -419,6 +505,23 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip infos.sampler.emplace_back(sampler.descriptor.info); } + // + /* + uf::stl::vector accelerationStructureInfos; + { + auto& descriptorAccelerationStructureInfo = accelerationStructureInfos.emplace_back(); + descriptorAccelerationStructureInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + descriptorAccelerationStructureInfo.accelerationStructureCount = 1; + descriptorAccelerationStructureInfo.pAccelerationStructures = &graphic.accelerationStructures.top.handle; + } + */ + + VkWriteDescriptorSetAccelerationStructureKHR descriptorAccelerationStructureInfo{}; + descriptorAccelerationStructureInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + descriptorAccelerationStructureInfo.accelerationStructureCount = 1; + descriptorAccelerationStructureInfo.pAccelerationStructures = &graphic.accelerationStructures.top.handle; + + // check if we can even consume that many infos size_t consumes = 0; for ( auto& layout : shader->descriptorSetLayoutBindings ) { @@ -464,6 +567,7 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip auto uniformBufferInfo = infos.uniform.begin(); auto storageBufferInfo = infos.storage.begin(); + auto accelerationStructureInfo = infos.accelerationStructure.begin(); auto imageInfo = infos.image.begin(); auto image2DInfo = infos.image2D.begin(); @@ -578,6 +682,19 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip )); storageBufferInfo += layout.descriptorCount; } break; + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + UF_ASSERT_BREAK_MSG( accelerationStructureInfo != infos.accelerationStructure.end(), "Filename: " << shader->filename << "\tCount: " << layout.descriptorCount ) + auto& writeDescriptorSet = writeDescriptorSets.emplace_back(ext::vulkan::initializers::writeDescriptorSet( + descriptorSet, + layout.descriptorType, + layout.binding, + &(*accelerationStructureInfo), + layout.descriptorCount + )); + writeDescriptorSet.pNext = &descriptorAccelerationStructureInfo; + + accelerationStructureInfo += layout.descriptorCount; + } break; } } } @@ -632,17 +749,17 @@ void ext::vulkan::Pipeline::update( const Graphic& graphic, const GraphicDescrip } { - // bool locked = renderMode.tryMutex(); + bool locked = renderMode.tryMutex(); renderMode.rebuild = true; vkUpdateDescriptorSets( *device, writeDescriptorSets.size(), writeDescriptorSets.data(), 0, NULL ); - // if ( locked ) renderMode.unlockMutex(); + if ( locked ) renderMode.unlockMutex(); } return; PIPELINE_UPDATE_INVALID: // graphic.process = false; VK_DEBUG_VALIDATION_MESSAGE("Pipeline update invalid, updating next tick..."); - uf::thread::queue( uf::thread::get("Main"), [&]{ + uf::thread::queue( uf::thread::get(uf::thread::mainThreadName), [&]{ this->update( graphic, descriptor ); }); return; @@ -667,7 +784,13 @@ void ext::vulkan::Pipeline::destroy() { descriptorSetLayout = VK_NULL_HANDLE; } - if ( settings::experimental::dedicatedThread ) ext::vulkan::states::rebuild = true; +// if ( settings::experimental::dedicatedThread ) ext::vulkan::states::rebuild = true; +/* + if ( ext::vulkan::hasRenderMode(descriptor.renderMode, true) ) { + RenderMode& renderMode = ext::vulkan::getRenderMode(descriptor.renderMode, true); + renderMode.rebuild = true; + } +*/ } uf::stl::vector ext::vulkan::Pipeline::getShaders( uf::stl::vector& shaders ) { uf::stl::unordered_map map; @@ -741,19 +864,19 @@ void ext::vulkan::Material::attachShader( const uf::stl::string& filename, VkSha uf::stl::string type = "unknown"; switch ( stage ) { case VK_SHADER_STAGE_VERTEX_BIT: type = "vertex"; break; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: type = "tessellation_control"; break; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: type = "tessellation_evaluation"; break; + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: type = "tessellation:control"; break; + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: type = "tessellation:evaluation"; break; case VK_SHADER_STAGE_GEOMETRY_BIT: type = "geometry"; break; case VK_SHADER_STAGE_FRAGMENT_BIT: type = "fragment"; break; case VK_SHADER_STAGE_COMPUTE_BIT: type = "compute"; break; - case VK_SHADER_STAGE_ALL_GRAPHICS: type = "all_graphics"; break; + case VK_SHADER_STAGE_ALL_GRAPHICS: type = "all:graphics"; break; case VK_SHADER_STAGE_ALL: type = "all"; break; - case VK_SHADER_STAGE_RAYGEN_BIT_KHR: type = "raygen"; break; - case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: type = "any_hit"; break; - case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: type = "closest_hit"; break; - case VK_SHADER_STAGE_MISS_BIT_KHR: type = "miss"; break; - case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: type = "intersection"; break; - case VK_SHADER_STAGE_CALLABLE_BIT_KHR: type = "callable"; break; + case VK_SHADER_STAGE_RAYGEN_BIT_KHR: type = "ray:gen"; break; + case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: type = "ray:hit:any"; break; + case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: type = "ray:hit:closest"; break; + case VK_SHADER_STAGE_MISS_BIT_KHR: type = "ray:miss"; break; + case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: type = "ray:intersection"; break; + case VK_SHADER_STAGE_CALLABLE_BIT_KHR: type = "ray:callable"; break; } shader.metadata.pipeline = pipeline; shader.metadata.type = type; @@ -802,7 +925,7 @@ void ext::vulkan::Graphic::initializePipeline() { initializePipeline( this->descriptor, false ); } ext::vulkan::Pipeline& ext::vulkan::Graphic::initializePipeline( const GraphicDescriptor& descriptor, bool update ) { - auto& pipeline = pipelines[descriptor.hash()]; + auto& pipeline = pipelines[descriptor]; pipeline.initialize(*this, descriptor); pipeline.update(*this, descriptor); @@ -836,15 +959,16 @@ void ext::vulkan::Graphic::initializeMesh( uf::Mesh& mesh, bool buffer ) { }; uf::stl::vector queue; descriptor.inputs.bufferOffset = buffers.empty() ? 0 : buffers.size() - 1; + VkBufferUsageFlags baseUsage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; #define PARSE_ATTRIBUTE(i, usage) {\ auto& buffer = mesh.buffers[i];\ if ( queue.size() <= i ) queue.resize( i );\ - if ( !buffer.empty() ) queue.emplace_back(Queue{ (void*) buffer.data(), buffer.size(), usage });\ + if ( !buffer.empty() ) queue.emplace_back(Queue{ (void*) buffer.data(), buffer.size(), usage | baseUsage });\ } #define PARSE_INPUT(name, usage){\ - if ( mesh.isInterleaved( mesh.name.interleaved ) ) PARSE_ATTRIBUTE(descriptor.inputs.name.interleaved, usage)\ - else for ( auto& attribute : descriptor.inputs.name.attributes ) PARSE_ATTRIBUTE(attribute.buffer, usage)\ + if ( mesh.isInterleaved( mesh.name.interleaved ) ) PARSE_ATTRIBUTE(descriptor.inputs.name.interleaved, usage | baseUsage)\ + else for ( auto& attribute : descriptor.inputs.name.attributes ) PARSE_ATTRIBUTE(attribute.buffer, usage | baseUsage)\ } PARSE_INPUT(vertex, uf::renderer::enums::Buffer::VERTEX) @@ -914,7 +1038,7 @@ bool ext::vulkan::Graphic::updateMesh( uf::Mesh& mesh ) { return rebuild; } bool ext::vulkan::Graphic::hasPipeline( const GraphicDescriptor& descriptor ) const { - return pipelines.count( descriptor.hash() ) > 0; + return pipelines.count( descriptor ) > 0; } ext::vulkan::Pipeline& ext::vulkan::Graphic::getPipeline() { return getPipeline( descriptor ); @@ -924,11 +1048,11 @@ const ext::vulkan::Pipeline& ext::vulkan::Graphic::getPipeline() const { } ext::vulkan::Pipeline& ext::vulkan::Graphic::getPipeline( const GraphicDescriptor& descriptor ) { if ( !hasPipeline(descriptor) ) return initializePipeline( descriptor ); - return pipelines[descriptor.hash()]; + return pipelines[descriptor]; } const ext::vulkan::Pipeline& ext::vulkan::Graphic::getPipeline( const GraphicDescriptor& descriptor ) const { if ( !hasPipeline(descriptor) ) UF_EXCEPTION("does not have pipeline"); - return pipelines.at(descriptor.hash()); + return pipelines.at(descriptor); } void ext::vulkan::Graphic::updatePipelines() { for ( auto pair : this->pipelines ) pair.second.update( *this ); @@ -939,13 +1063,13 @@ void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, size_t pass, s void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, const GraphicDescriptor& descriptor, size_t pass, size_t draw ) const { if ( !process ) return; if ( !this->hasPipeline( descriptor ) ) { - VK_DEBUG_VALIDATION_MESSAGE(this << ": has no valid pipeline"); + UF_MSG_ERROR(this << ": has no valid pipeline (" << descriptor.renderMode << " " << descriptor.renderTarget << ")"); return; } auto& pipeline = this->getPipeline( descriptor ); if ( pipeline.descriptorSet == VK_NULL_HANDLE ) { - VK_DEBUG_VALIDATION_MESSAGE(this << ": has no valid pipeline descriptor set"); + UF_MSG_ERROR(this << ": has no valid pipeline descriptor set (" << descriptor.renderMode << " " << descriptor.renderTarget << ")"); return; } if ( !pipeline.metadata.process ) return; @@ -984,26 +1108,6 @@ void ext::vulkan::Graphic::record( VkCommandBuffer commandBuffer, const GraphicD auto& attribute = descriptor.inputs.indirect.attributes.front(); indirect.buffer = buffers.at((0 <= descriptor.inputs.indirect.interleaved ? descriptor.inputs.indirect.interleaved : attribute.buffer) + descriptor.inputs.bufferOffset).buffer; indirect.offset = 0 <= descriptor.inputs.indirect.interleaved ? descriptor.inputs.indirect.offset : attribute.offset; - - /* - .indices = indices.size(), - .instances = 1, - .indexID = mesh.index.count, - .vertexID = mesh.vertex.count, - - .instanceID = mesh.instance.count, - .materialID = p.material, - .objectID = 0, - .vertices = vertices.size(), - - if ( attribute.descriptor.pointer ) { - pod::DrawCommand* drawCommands = (pod::DrawCommand*) attribute.descriptor.pointer; - for ( auto i = 0; i < descriptor.inputs.indirect.count; ++i ) { - auto& drawCommand = drawCommands[i]; - // UF_MSG_DEBUG( "DrawCommand[" << i << "]: " << drawCommand.indices << " " << drawCommand.instances << " " << drawCommand.indexID << " " << drawCommand.vertexID << " " << drawCommand.instanceID << " " << drawCommand.materialID << " " << drawCommand.objectID << " " << drawCommand.vertices ); - } - } - */ } for ( auto& buffer : buffers ) { @@ -1085,7 +1189,7 @@ void ext::vulkan::Graphic::destroy() { pipelines.clear(); material.destroy(); ext::vulkan::Buffers::destroy(); - ext::vulkan::states::rebuild = true; +// ext::vulkan::states::rebuild = true; } #include @@ -1109,7 +1213,28 @@ void ext::vulkan::GraphicDescriptor::parse( ext::json::Value& metadata ) { } } ext::vulkan::GraphicDescriptor::hash_t ext::vulkan::GraphicDescriptor::hash() const { - size_t hash{}; + size_t seed{}; +#if 0 + for ( auto i = 0; i < inputs.vertex.attributes.size(); ++i ) { + uf::hash( inputs.vertex.attributes[i].descriptor.format ); + uf::hash( inputs.vertex.attributes[i].descriptor.offset ); + } + for ( auto i = 0; i < inputs.index.attributes.size(); ++i ) { + uf::hash( inputs.index.attributes[i].descriptor.format ); + uf::hash( inputs.index.attributes[i].descriptor.offset ); + } + for ( auto i = 0; i < inputs.instance.attributes.size(); ++i ) { + uf::hash( inputs.instance.attributes[i].descriptor.format ); + uf::hash( inputs.instance.attributes[i].descriptor.offset ); + } + for ( auto i = 0; i < inputs.indirect.attributes.size(); ++i ) { + uf::hash( inputs.indirect.attributes[i].descriptor.format ); + uf::hash( inputs.indirect.attributes[i].descriptor.offset ); + } +#endif + uf::hash( seed, subpass, renderMode, renderTarget, pipeline, topology, cullMode, fill, lineWidth, frontFace, depth.test, depth.write, depth.operation, depth.bias.enable, depth.bias.constant, depth.bias.slope, depth.bias.clamp ); + return seed; +#if 0 hash += std::hash{}(subpass); if ( settings::invariant::individualPipelines ) @@ -1151,6 +1276,7 @@ ext::vulkan::GraphicDescriptor::hash_t ext::vulkan::GraphicDescriptor::hash() co hash += std::hash{}(depth.bias.clamp); return hash; +#endif } #endif \ No newline at end of file diff --git a/engine/src/ext/vulkan/rendermode.cpp b/engine/src/ext/vulkan/rendermode.cpp index d65ab9d0..eda98348 100644 --- a/engine/src/ext/vulkan/rendermode.cpp +++ b/engine/src/ext/vulkan/rendermode.cpp @@ -77,7 +77,7 @@ uf::Image ext::vulkan::RenderMode::screenshot( size_t attachmentID, size_t layer VK_CHECK_RESULT(vmaCreateImage(allocator, &imageCreateInfo, &allocationCreateInfo, &temporary, &allocation, &allocationInfo)); VkDeviceMemory temporaryMemory = allocationInfo.deviceMemory; - VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); VkImageMemoryBarrier imageMemoryBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // ext::vulkan::device.queueFamilyIndices.graphics; //VK_QUEUE_FAMILY_IGNORED @@ -162,7 +162,7 @@ uf::Image ext::vulkan::RenderMode::screenshot( size_t attachmentID, size_t layer imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; imageMemoryBarrier.newLayout = attachment.descriptor.layout; vkCmdPipelineBarrier(copyCmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, NULL, 0, NULL, 1, &imageMemoryBarrier ); - device->flushCommandBuffer(copyCmd, true); + device->flushCommandBuffer(copyCmd, Device::QueueEnum::GRAPHICS); const uint8_t* data; vmaMapMemory( allocator, allocation, (void**)&data ); @@ -177,6 +177,8 @@ ext::vulkan::GraphicDescriptor ext::vulkan::RenderMode::bindGraphicDescriptor( c // descriptor.renderMode = this->getName(); descriptor.subpass = pass; descriptor.pipeline = metadata.pipeline; + descriptor.inputs.width = this->width ? this->width : settings::width; + descriptor.inputs.height = this->height ? this->height : settings::height; descriptor.parse( metadata.json["descriptor"] ); return descriptor; } @@ -224,7 +226,7 @@ void ext::vulkan::RenderMode::lockMutex( std::thread::id id ) { this->commands.lockMutex( id ); } bool ext::vulkan::RenderMode::tryMutex( std::thread::id id ) { - this->commands.tryMutex( id ); + return this->commands.tryMutex( id ); } void ext::vulkan::RenderMode::unlockMutex( std::thread::id id ) { this->commands.unlockMutex( id ); @@ -232,6 +234,16 @@ void ext::vulkan::RenderMode::unlockMutex( std::thread::id id ) { std::lock_guard ext::vulkan::RenderMode::guardMutex( std::thread::id id ) { return this->commands.guardMutex( id ); } +void ext::vulkan::RenderMode::cleanupCommands( std::thread::id id ) { + auto& container = this->commands.container(); + for ( auto& pair : container ) { + if ( pair.first == id ) continue; + if ( pair.second.empty() ) continue; + vkFreeCommandBuffers( *device, device->getCommandPool(this->getType() == "Compute" ? Device::QueueEnum::COMPUTE : Device::QueueEnum::GRAPHICS, pair.first), static_cast(pair.second.size()), pair.second.data()); + pair.second.clear(); + } + this->commands.cleanup( id ); +} void ext::vulkan::RenderMode::createCommandBuffers( const uf::stl::vector& graphics ) { } diff --git a/engine/src/ext/vulkan/rendermodes/deferred.cpp b/engine/src/ext/vulkan/rendermodes/deferred.cpp index 7b0db63e..2b12afe7 100644 --- a/engine/src/ext/vulkan/rendermodes/deferred.cpp +++ b/engine/src/ext/vulkan/rendermodes/deferred.cpp @@ -565,7 +565,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto for ( auto _ : layers ) { RenderTargetRenderMode* layer = (RenderTargetRenderMode*) _; auto& blitter = layer->blitter; - if ( !blitter.initialized || !blitter.process || blitter.descriptor.subpass != currentPass ) continue; + if ( !blitter.initialized || !blitter.process || blitter.descriptor.subpass != currentPass || blitter.accelerationStructures.top.handle ) continue; ext::vulkan::GraphicDescriptor descriptor = bindGraphicDescriptor(blitter.descriptor, currentSubpass); blitter.record(commands[i], descriptor); } @@ -581,7 +581,7 @@ void ext::vulkan::DeferredRenderMode::createCommandBuffers( const uf::stl::vecto for ( auto _ : layers ) { RenderTargetRenderMode* layer = (RenderTargetRenderMode*) _; auto& blitter = layer->blitter; - if ( !blitter.initialized || !blitter.process || blitter.descriptor.subpass != currentPass ) continue; + if ( !blitter.initialized || !blitter.process || blitter.descriptor.subpass != currentPass || blitter.accelerationStructures.top.handle ) continue; ext::vulkan::GraphicDescriptor descriptor = bindGraphicDescriptor(blitter.descriptor, currentSubpass); blitter.record(commands[i], descriptor, eye, currentDraw++); } diff --git a/engine/src/ext/vulkan/rendermodes/raytrace.cpp b/engine/src/ext/vulkan/rendermodes/raytrace.cpp new file mode 100644 index 00000000..72f962d2 --- /dev/null +++ b/engine/src/ext/vulkan/rendermodes/raytrace.cpp @@ -0,0 +1,522 @@ +#if UF_USE_VULKAN + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + // + struct UniformDescriptor { + struct Matrices { + alignas(16) pod::Matrix4f view; + alignas(16) pod::Matrix4f projection; + alignas(16) pod::Matrix4f iView; + alignas(16) pod::Matrix4f iProjection; + alignas(16) pod::Matrix4f iProjectionView; + alignas(16) pod::Vector4f eyePos; + } matrices[2]; + }; + + void initializeGraphic( uf::renderer::RayTraceRenderMode& renderMode ) { + auto& device = *renderMode.device; + auto& blitter = *renderMode.getBlitter(); + + // setup buffers + uf::Mesh mesh; + mesh.bind(); + mesh.insertVertices({ + { { -0.5f, -0.5f, 0.0f } }, + { { 0.5f, -0.5f, 0.0f } }, + { { 0.0f, 0.5f, 0.0f } }, + }); + mesh.insertIndices({ + 0, 1, 2 + }); + mesh.updateDescriptor(); + + blitter.initialize( renderMode.getName() ); + blitter.initializeMesh( mesh ); + blitter.process = false; + + { + auto& scene = uf::scene::getCurrentScene(); + auto& controller = scene.getController(); + auto& camera = controller.getComponent(); + + UniformDescriptor uniforms; + for ( auto i = 0; i < 2; ++i ) { + uniforms.matrices[i] = UniformDescriptor::Matrices{ + .view = camera.getView(i), + .projection = camera.getProjection(i), + .iView = uf::matrix::inverse( camera.getView(i) ), + .iProjection = uf::matrix::inverse( camera.getProjection(i) ), + .iProjectionView = uf::matrix::inverse( camera.getProjection(i) * camera.getView(i) ), + .eyePos = camera.getEye( i ), + }; + } + blitter.initializeBuffer( (const void*) &uniforms, sizeof(UniformDescriptor), uf::renderer::enums::Buffer::UNIFORM ); + } + + auto& image = blitter.material.textures.emplace_back(); + image.fromBuffers( NULL, 0, uf::renderer::enums::Format::R8G8B8A8_UNORM, uf::renderer::settings::width, uf::renderer::settings::height, 1, 1, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT, VK_IMAGE_LAYOUT_GENERAL ); + + pod::Matrix4f transformMatrix = uf::matrix::identity(); + auto vertexBufferIndex = blitter.initializeBuffer( (const void*) mesh.vertex.attributes.front().pointer, mesh.vertex.count * mesh.vertex.size, uf::renderer::enums::Buffer::ADDRESS | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR ); + auto indexBufferIndex = blitter.initializeBuffer( (const void*) mesh.index.attributes.front().pointer, mesh.index.count * mesh.index.size, uf::renderer::enums::Buffer::ADDRESS | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR ); + + blitter.requestedAlignment = 16; + auto transformBufferIndex = blitter.initializeBuffer( &transformMatrix[0], sizeof(transformMatrix), uf::renderer::enums::Buffer::ADDRESS | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR ); + blitter.requestedAlignment = 0; + + uf::renderer::Buffer vertexBuffer; + uf::renderer::Buffer indexBuffer; + uf::renderer::Buffer transformBuffer; + uf::renderer::Buffer blasBuffer; + uf::renderer::Buffer instancesBuffer; + uf::renderer::Buffer tlasBuffer; + uf::renderer::Buffer scratchBuffer; + + // build SBT + { + uf::stl::string rayGenShaderFilename = uf::io::root+"/shaders/raytrace/shader.gen.spv"; + uf::stl::string rayMissShaderFilename = uf::io::root+"/shaders/raytrace/shader.miss.spv"; + uf::stl::string rayHitShaderFilename = uf::io::root+"/shaders/raytrace/shader.hit.spv"; + blitter.material.initializeShaders({ + {uf::io::resolveURI(rayGenShaderFilename), VK_SHADER_STAGE_RAYGEN_BIT_KHR}, + {uf::io::resolveURI(rayMissShaderFilename), VK_SHADER_STAGE_MISS_BIT_KHR}, + {uf::io::resolveURI(rayHitShaderFilename), VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR}, + }); + } + + blitter.material.getShader("ray:gen").textures.emplace_back().aliasTexture( image ); + + VkPhysicalDeviceAccelerationStructurePropertiesKHR acclerationStructureProperties{}; + VkPhysicalDeviceRayTracingPipelinePropertiesKHR rayTracingPipelineProperties{}; + VkPhysicalDeviceProperties2 deviceProperties2{}; + + { + acclerationStructureProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR; + + rayTracingPipelineProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + rayTracingPipelineProperties.pNext = &acclerationStructureProperties; + + deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + deviceProperties2.pNext = &rayTracingPipelineProperties; + + vkGetPhysicalDeviceProperties2(device.physicalDevice, &deviceProperties2); + } + + // build blas + { + // get address + VkDeviceOrHostAddressConstKHR vertexBufferDeviceAddress{}; + VkDeviceOrHostAddressConstKHR indexBufferDeviceAddress{}; + VkDeviceOrHostAddressConstKHR transformBufferDeviceAddress{}; + + vertexBuffer.aliasBuffer( blitter.buffers[vertexBufferIndex] ); + indexBuffer.aliasBuffer( blitter.buffers[indexBufferIndex] ); + transformBuffer.aliasBuffer( blitter.buffers[transformBufferIndex] ); + + UF_ASSERT( vertexBuffer.buffer ); + UF_ASSERT( indexBuffer.buffer ); + UF_ASSERT( transformBuffer.buffer ); + + vertexBufferDeviceAddress.deviceAddress = vertexBuffer.getAddress(); + indexBufferDeviceAddress.deviceAddress = indexBuffer.getAddress(); + transformBufferDeviceAddress.deviceAddress = transformBuffer.getAddress(); + + // attribute info + uf::Mesh::Attribute vertexAttribute; + for ( auto& attribute : mesh.vertex.attributes ) if ( attribute.descriptor.name == "position" ) vertexAttribute = attribute; + + UF_ASSERT( vertexAttribute.descriptor.name == "position" ); + + // blas info + VkAccelerationStructureGeometryKHR accelerationStructureGeometry{}; + accelerationStructureGeometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accelerationStructureGeometry.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + accelerationStructureGeometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + accelerationStructureGeometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + + accelerationStructureGeometry.geometry.triangles.vertexFormat = vertexAttribute.descriptor.format; + accelerationStructureGeometry.geometry.triangles.vertexData = vertexBufferDeviceAddress; + accelerationStructureGeometry.geometry.triangles.maxVertex = mesh.vertex.count; + accelerationStructureGeometry.geometry.triangles.vertexStride = vertexAttribute.stride; + + accelerationStructureGeometry.geometry.triangles.indexType = VK_INDEX_TYPE_UINT16; + accelerationStructureGeometry.geometry.triangles.indexData = indexBufferDeviceAddress; + + accelerationStructureGeometry.geometry.triangles.transformData.deviceAddress = 0; + accelerationStructureGeometry.geometry.triangles.transformData.hostAddress = nullptr; + accelerationStructureGeometry.geometry.triangles.transformData = transformBufferDeviceAddress; + + // size info + VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfo{}; + accelerationStructureBuildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accelerationStructureBuildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + accelerationStructureBuildGeometryInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accelerationStructureBuildGeometryInfo.geometryCount = 1; + accelerationStructureBuildGeometryInfo.pGeometries = &accelerationStructureGeometry; + + const uint32_t numTriangles = 1; // (mesh.index.count ? mesh.index.count : mesh.vertex.count) / 3; + VkAccelerationStructureBuildSizesInfoKHR accelerationStructureBuildSizesInfo{}; + accelerationStructureBuildSizesInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + UF_MSG_DEBUG("vkGetAccelerationStructureBuildSizesKHR"); + uf::renderer::vkGetAccelerationStructureBuildSizesKHR( + device, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &accelerationStructureBuildGeometryInfo, + &numTriangles, + &accelerationStructureBuildSizesInfo + ); + + // build blas buffer + size_t blasBufferIndex = blitter.initializeBuffer( NULL, accelerationStructureBuildSizesInfo.accelerationStructureSize, uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE | uf::renderer::enums::Buffer::ADDRESS ); + blitter.accelerationStructures.bottom.buffer.aliasBuffer( blitter.buffers[blasBufferIndex] ); + + // build blas handle + VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfo{}; + accelerationStructureCreateInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + accelerationStructureCreateInfo.buffer = blitter.accelerationStructures.bottom.buffer.buffer; + accelerationStructureCreateInfo.size = accelerationStructureBuildSizesInfo.accelerationStructureSize; + accelerationStructureCreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + + UF_MSG_DEBUG("vkCreateAccelerationStructureKHR"); + VK_CHECK_RESULT(uf::renderer::vkCreateAccelerationStructureKHR(device, &accelerationStructureCreateInfo, nullptr, &blitter.accelerationStructures.bottom.handle)); + + // build scratch buffer + scratchBuffer.alignment = acclerationStructureProperties.minAccelerationStructureScratchOffsetAlignment; + scratchBuffer.initialize( NULL, accelerationStructureBuildSizesInfo.buildScratchSize, uf::renderer::enums::Buffer::STORAGE | uf::renderer::enums::Buffer::ADDRESS ); + + VkAccelerationStructureBuildGeometryInfoKHR accelerationBuildGeometryInfo{}; + accelerationBuildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accelerationBuildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + accelerationBuildGeometryInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accelerationBuildGeometryInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accelerationBuildGeometryInfo.dstAccelerationStructure = blitter.accelerationStructures.bottom.handle; + accelerationBuildGeometryInfo.geometryCount = 1; // numTriangles; + accelerationBuildGeometryInfo.pGeometries = &accelerationStructureGeometry; + accelerationBuildGeometryInfo.scratchData.deviceAddress = scratchBuffer.getAddress(); + + VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfo{}; + accelerationStructureBuildRangeInfo.primitiveCount = numTriangles; + accelerationStructureBuildRangeInfo.primitiveOffset = 0; + accelerationStructureBuildRangeInfo.firstVertex = 0; + accelerationStructureBuildRangeInfo.transformOffset = 0; + std::vector accelerationBuildStructureRangeInfos = { &accelerationStructureBuildRangeInfo }; + + // build blas + VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, uf::renderer::Device::QueueEnum::COMPUTE); + + UF_MSG_DEBUG("vkCmdBuildAccelerationStructuresKHR"); + uf::renderer::vkCmdBuildAccelerationStructuresKHR( + commandBuffer, + 1, + &accelerationBuildGeometryInfo, + accelerationBuildStructureRangeInfos.data() + ); + device.flushCommandBuffer(commandBuffer, uf::renderer::Device::QueueEnum::COMPUTE); + + VkAccelerationStructureDeviceAddressInfoKHR accelerationDeviceAddressInfo{}; + accelerationDeviceAddressInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR; + accelerationDeviceAddressInfo.accelerationStructure = blitter.accelerationStructures.bottom.handle; + blitter.accelerationStructures.bottom.deviceAddress = uf::renderer::vkGetAccelerationStructureDeviceAddressKHR(device, &accelerationDeviceAddressInfo); + + scratchBuffer.destroy(); + } + // build tlas + { + // setup instances buffer + VkAccelerationStructureInstanceKHR instance{}; + instance.transform = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f + }; + instance.instanceCustomIndex = 0; + instance.mask = 0xFF; + instance.instanceShaderBindingTableRecordOffset = 0; + instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + instance.accelerationStructureReference = blitter.accelerationStructures.bottom.deviceAddress; + + auto instancesBufferIndex = blitter.initializeBuffer( NULL, sizeof(instance), uf::renderer::enums::Buffer::ADDRESS | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR ); + instancesBuffer.aliasBuffer( blitter.buffers[instancesBufferIndex] ); + blitter.updateBuffer( (const void*) &instance, sizeof(instance), instancesBuffer ); + + VkDeviceOrHostAddressConstKHR instanceDataDeviceAddress{}; + instanceDataDeviceAddress.deviceAddress = instancesBuffer.getAddress(); + + VkAccelerationStructureGeometryKHR accelerationStructureGeometry{}; + accelerationStructureGeometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accelerationStructureGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + accelerationStructureGeometry.flags = VK_GEOMETRY_OPAQUE_BIT_KHR; + accelerationStructureGeometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + accelerationStructureGeometry.geometry.instances.arrayOfPointers = VK_FALSE; + accelerationStructureGeometry.geometry.instances.data = instanceDataDeviceAddress; + + VkAccelerationStructureBuildGeometryInfoKHR accelerationStructureBuildGeometryInfo{}; + accelerationStructureBuildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accelerationStructureBuildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + accelerationStructureBuildGeometryInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accelerationStructureBuildGeometryInfo.geometryCount = 1; + accelerationStructureBuildGeometryInfo.pGeometries = &accelerationStructureGeometry; + + const uint32_t primitive_count = 1; + VkAccelerationStructureBuildSizesInfoKHR accelerationStructureBuildSizesInfo{}; + accelerationStructureBuildSizesInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + UF_MSG_DEBUG("vkGetAccelerationStructureBuildSizesKHR"); + uf::renderer::vkGetAccelerationStructureBuildSizesKHR( + device, + VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &accelerationStructureBuildGeometryInfo, + &primitive_count, + &accelerationStructureBuildSizesInfo + ); + + // build tlas buffer + size_t tlasBufferIndex = blitter.initializeBuffer( NULL, accelerationStructureBuildSizesInfo.accelerationStructureSize, uf::renderer::enums::Buffer::ACCELERATION_STRUCTURE | uf::renderer::enums::Buffer::ADDRESS ); + blitter.accelerationStructures.top.buffer.aliasBuffer( blitter.buffers[tlasBufferIndex] ); + blitter.material.getShader("ray:gen").buffers.emplace_back().aliasBuffer( blitter.buffers[tlasBufferIndex] ); + + // build tlas handle + VkAccelerationStructureCreateInfoKHR accelerationStructureCreateInfo{}; + accelerationStructureCreateInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + accelerationStructureCreateInfo.buffer = blitter.accelerationStructures.top.buffer.buffer; + accelerationStructureCreateInfo.size = accelerationStructureBuildSizesInfo.accelerationStructureSize; + accelerationStructureCreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + + UF_MSG_DEBUG("vkCreateAccelerationStructureKHR"); + VK_CHECK_RESULT(uf::renderer::vkCreateAccelerationStructureKHR(device, &accelerationStructureCreateInfo, nullptr, &blitter.accelerationStructures.top.handle)); + + // build scratch buffer + scratchBuffer.alignment = acclerationStructureProperties.minAccelerationStructureScratchOffsetAlignment; + scratchBuffer.initialize( NULL, accelerationStructureBuildSizesInfo.buildScratchSize, uf::renderer::enums::Buffer::STORAGE | uf::renderer::enums::Buffer::ADDRESS ); + UF_MSG_DEBUG(accelerationStructureBuildSizesInfo.buildScratchSize); + + VkAccelerationStructureBuildGeometryInfoKHR accelerationBuildGeometryInfo{}; + accelerationBuildGeometryInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accelerationBuildGeometryInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + accelerationBuildGeometryInfo.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accelerationBuildGeometryInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accelerationBuildGeometryInfo.dstAccelerationStructure = blitter.accelerationStructures.top.handle; + accelerationBuildGeometryInfo.geometryCount = 1; + accelerationBuildGeometryInfo.pGeometries = &accelerationStructureGeometry; + accelerationBuildGeometryInfo.scratchData.deviceAddress = scratchBuffer.getAddress(); + + VkAccelerationStructureBuildRangeInfoKHR accelerationStructureBuildRangeInfo{}; + accelerationStructureBuildRangeInfo.primitiveCount = 1; + accelerationStructureBuildRangeInfo.primitiveOffset = 0; + accelerationStructureBuildRangeInfo.firstVertex = 0; + accelerationStructureBuildRangeInfo.transformOffset = 0; + std::vector accelerationBuildStructureRangeInfos = { &accelerationStructureBuildRangeInfo }; + + VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, uf::renderer::Device::QueueEnum::COMPUTE); + UF_MSG_DEBUG("vkCmdBuildAccelerationStructuresKHR"); + uf::renderer::vkCmdBuildAccelerationStructuresKHR( + commandBuffer, + 1, + &accelerationBuildGeometryInfo, + accelerationBuildStructureRangeInfos.data() + ); + device.flushCommandBuffer(commandBuffer, uf::renderer::Device::QueueEnum::COMPUTE); + + VkAccelerationStructureDeviceAddressInfoKHR accelerationDeviceAddressInfo{}; + accelerationDeviceAddressInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR; + accelerationDeviceAddressInfo.accelerationStructure = blitter.accelerationStructures.top.handle; + blitter.accelerationStructures.top.deviceAddress = uf::renderer::vkGetAccelerationStructureDeviceAddressKHR(device, &accelerationDeviceAddressInfo); + + scratchBuffer.destroy(); + } + blitter.descriptor.inputs.width = image.width; + blitter.descriptor.inputs.height = image.height; + + blitter.initializePipeline(); + + blitter.initialized = true; + blitter.process = true; + UF_MSG_DEBUG("Initialized ray tracer"); + } +} + +const uf::stl::string ext::vulkan::RayTraceRenderMode::getTarget() const { +// auto& metadata = *const_cast(&this->metadata); +// return metadata["target"].as(); + return metadata.target; +} +void ext::vulkan::RayTraceRenderMode::setTarget( const uf::stl::string& target ) { +// this->metadata["target"] = target; + metadata.target = target; +} + +const uf::stl::string ext::vulkan::RayTraceRenderMode::getType() const { + return "Compute"; +} +const size_t ext::vulkan::RayTraceRenderMode::blitters() const { + return 1; +} +ext::vulkan::Graphic* ext::vulkan::RayTraceRenderMode::getBlitter( size_t i ) { + return &this->blitter; +} +uf::stl::vector ext::vulkan::RayTraceRenderMode::getBlitters() { + return { &this->blitter }; +} + +ext::vulkan::GraphicDescriptor ext::vulkan::RayTraceRenderMode::bindGraphicDescriptor( const ext::vulkan::GraphicDescriptor& reference, size_t pass ) { + ext::vulkan::GraphicDescriptor descriptor = ext::vulkan::RenderMode::bindGraphicDescriptor(reference, pass); + descriptor.parse(metadata.json["descriptor"]); + + // invalidate + if ( metadata.target != "" && descriptor.renderMode != this->getName() && descriptor.renderMode != metadata.target ) { + descriptor.invalidated = true; + } else { + descriptor.renderMode = this->getName(); + } + return descriptor; +} + +void ext::vulkan::RayTraceRenderMode::initialize( Device& device ) { + ext::vulkan::RenderMode::initialize( device ); + + blitter.process = false; + initializeGraphic(*this); +} + +void ext::vulkan::RayTraceRenderMode::tick() { + ext::vulkan::RenderMode::tick(); + + bool resized = this->width == 0 && this->height == 0 && ext::vulkan::states::resized; + bool rebuild = resized || ext::vulkan::states::rebuild || this->rebuild; + + if ( resized ) { + auto& image = blitter.material.textures.front(); + + image.destroy(); + image.fromBuffers( NULL, 0, uf::renderer::enums::Format::R8G8B8A8_UNORM, uf::renderer::settings::width, uf::renderer::settings::height, 1, 1, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT, VK_IMAGE_LAYOUT_GENERAL ); + + auto& shader = blitter.material.getShader("ray:gen"); + shader.textures.clear(); + shader.textures.emplace_back().aliasTexture( image ); + + blitter.descriptor.inputs.width = image.width; + blitter.descriptor.inputs.height = image.height; + + blitter.getPipeline().update( blitter ); + } + + { + auto& scene = uf::scene::getCurrentScene(); + auto& controller = scene.getController(); + auto& camera = controller.getComponent(); + + UniformDescriptor uniforms; + for ( auto i = 0; i < 2; ++i ) { + uniforms.matrices[i] = UniformDescriptor::Matrices{ + .view = camera.getView(i), + .projection = camera.getProjection(i), + .iView = uf::matrix::inverse( camera.getView(i) ), + .iProjection = uf::matrix::inverse( camera.getProjection(i) ), + .iProjectionView = uf::matrix::inverse( camera.getProjection(i) * camera.getView(i) ), + .eyePos = camera.getEye( i ), + }; + } + + for ( auto& buffer : blitter.buffers ) { + if ( !(buffer.usage & uf::renderer::enums::Buffer::UNIFORM) ) continue; + if ( buffer.allocationInfo.size != sizeof(UniformDescriptor) ) continue; + + buffer.update( (const void*) &uniforms, sizeof(UniformDescriptor) ); + break; + } + + } + + if ( metadata.limiter.frequency > 0 ) { + if ( metadata.limiter.timer > metadata.limiter.frequency ) { + metadata.limiter.timer = 0; + metadata.limiter.execute = true; + } else { + metadata.limiter.timer = metadata.limiter.timer + uf::physics::time::delta; + metadata.limiter.execute = false; + } + } + +// if ( !graphic.initialized ) initializeGraphic(*this); +} +void ext::vulkan::RayTraceRenderMode::destroy() { + auto& image = blitter.material.textures.front(); + image.screenshot().save("./data/rt.png"); + image.destroy(); + + blitter.destroy(); + + ext::vulkan::RenderMode::destroy(); +} + +void ext::vulkan::RayTraceRenderMode::render() { + if ( commandBufferCallbacks.count(EXECUTE_BEGIN) > 0 ) commandBufferCallbacks[EXECUTE_BEGIN]( VkCommandBuffer{} ); + + //lockMutex( this->mostRecentCommandPoolId ); + auto& commands = getCommands( this->mostRecentCommandPoolId ); + + // Submit commands + // Use a fence to ensure that command buffer has finished executing before using it again + VK_CHECK_RESULT(vkWaitForFences( *device, 1, &fences[states::currentBuffer], VK_TRUE, UINT64_MAX )); + VK_CHECK_RESULT(vkResetFences( *device, 1, &fences[states::currentBuffer] )); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.pWaitDstStageMask = NULL; // Pointer to the list of pipeline stages that the semaphore waits will occur at + submitInfo.pWaitSemaphores = NULL; // Semaphore(s) to wait upon before the submitted command buffer starts executing + submitInfo.waitSemaphoreCount = 0; // One wait semaphore + submitInfo.pSignalSemaphores = NULL; // Semaphore(s) to be signaled when command buffers have completed + submitInfo.signalSemaphoreCount = 0; // One signal semaphore + submitInfo.pCommandBuffers = &commands[states::currentBuffer]; // Command buffers(s) to execute in this batch (submission) + submitInfo.commandBufferCount = 1; + + VK_CHECK_RESULT(vkQueueSubmit(device->getQueue( uf::renderer::Device::QueueEnum::COMPUTE ), 1, &submitInfo, fences[states::currentBuffer])); + + if ( commandBufferCallbacks.count(EXECUTE_END) > 0 ) commandBufferCallbacks[EXECUTE_END]( VkCommandBuffer{} ); + + this->executed = true; + //unlockMutex( this->mostRecentCommandPoolId ); +} +void ext::vulkan::RayTraceRenderMode::pipelineBarrier( VkCommandBuffer commandBuffer, uint8_t state ) { +} +void ext::vulkan::RayTraceRenderMode::createCommandBuffers( const uf::stl::vector& graphics ) { + // destroy if exists + float width = this->width > 0 ? this->width : ext::vulkan::settings::width; + float height = this->height > 0 ? this->height : ext::vulkan::settings::height; + + VkCommandBufferBeginInfo cmdBufInfo = {}; + cmdBufInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmdBufInfo.pNext = nullptr; + auto& commands = getCommands(); + for (size_t i = 0; i < commands.size(); ++i) { + VK_CHECK_RESULT(vkBeginCommandBuffer(commands[i], &cmdBufInfo)); + + // pre-renderpass commands + if ( commandBufferCallbacks.count(CALLBACK_BEGIN) > 0 ) commandBufferCallbacks[CALLBACK_BEGIN]( commands[i] ); + + if ( blitter.process ) { + auto& image = blitter.material.textures.front(); + auto descriptor = blitter.descriptor; + descriptor.inputs.width = image.width; + descriptor.inputs.height = image.height; + blitter.getPipeline().record( blitter, descriptor, commands[i] ); + } + + // post-renderpass commands + if ( commandBufferCallbacks.count(CALLBACK_END) > 0 ) commandBufferCallbacks[CALLBACK_END]( commands[i] ); + + VK_CHECK_RESULT(vkEndCommandBuffer(commands[i])); + } +} + +#endif \ No newline at end of file diff --git a/engine/src/ext/vulkan/rendertarget.cpp b/engine/src/ext/vulkan/rendertarget.cpp index 2fc7f5d6..7dc2176f 100644 --- a/engine/src/ext/vulkan/rendertarget.cpp +++ b/engine/src/ext/vulkan/rendertarget.cpp @@ -60,7 +60,7 @@ size_t ext::vulkan::RenderTarget::attach( const Attachment::Descriptor& descript } } if ( !supported ) { - VK_VALIDATION_MESSAGE("Transient attachment requested yet not supported, disabling..."); + // VK_VALIDATION_MESSAGE("Transient attachment requested yet not supported, disabling..."); attachment->descriptor.usage &= ~VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT; } } diff --git a/engine/src/ext/vulkan/shader.cpp b/engine/src/ext/vulkan/shader.cpp index 9c7a4b03..4c68e7b6 100644 --- a/engine/src/ext/vulkan/shader.cpp +++ b/engine/src/ext/vulkan/shader.cpp @@ -166,8 +166,7 @@ ext::vulkan::userdata_t ext::vulkan::jsonToUserdata( const ext::json::Value& pay auto get = input.as(); memcpy( byteBuffer, &get, size ); byteBuffer += size; - } - else if ( primitive == "uint32_t" ) { + } else if ( primitive == "uint32_t" ) { size_t size = sizeof(uint32_t); // v["size"].as(); if ( byteBufferEnd < byteBuffer + size ) return false; // overflow auto get = input.as(); @@ -530,6 +529,22 @@ void ext::vulkan::Shader::initialize( ext::vulkan::Device& device, const uf::stl binding }; } break; + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + // generate definition to JSON + #if UF_SHADER_PARSE_AS_JSON + { + metadata.json["definitions"]["accelerationStructure"][name]["name"] = name; + metadata.json["definitions"]["accelerationStructure"][name]["index"] = index; + metadata.json["definitions"]["accelerationStructure"][name]["binding"] = binding; + metadata.json["definitions"]["accelerationStructure"][name]["members"] = parseMembers(resource.type_id); + } + #endif + metadata.definitions.accelerationStructure[name] = Shader::Metadata::Definition::AccelerationStructure{ + name, + index, + binding + }; + } break; } descriptorSetLayoutBindings.push_back( ext::vulkan::initializers::descriptorSetLayoutBinding( descriptorType, stage, binding, arraySize ) ); }; @@ -548,6 +563,7 @@ void ext::vulkan::Shader::initialize( ext::vulkan::Device& device, const uf::stl LOOP_RESOURCES( subpass_inputs, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ); LOOP_RESOURCES( uniform_buffers, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ); LOOP_RESOURCES( storage_buffers, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ); + LOOP_RESOURCES( acceleration_structures, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR ); #undef LOOP_RESOURCES { diff --git a/engine/src/ext/vulkan/texture.cpp b/engine/src/ext/vulkan/texture.cpp index 5e1357a3..9a867ad2 100644 --- a/engine/src/ext/vulkan/texture.cpp +++ b/engine/src/ext/vulkan/texture.cpp @@ -528,7 +528,7 @@ void ext::vulkan::Texture::fromBuffers( subresourceRange.levelCount = this->mips; subresourceRange.layerCount = this->layers; - VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); setImageLayout( commandBuffer, image, @@ -536,7 +536,7 @@ void ext::vulkan::Texture::fromBuffers( imageLayout, subresourceRange ); - device.flushCommandBuffer(commandBuffer); + device.flushCommandBuffer(commandBuffer, Device::QueueEnum::GRAPHICS); this->imageLayout = imageLayout; } @@ -575,7 +575,7 @@ void ext::vulkan::Texture::asRenderTarget( Device& device, uint32_t width, uint3 VK_CHECK_RESULT(vmaCreateImage(allocator, &imageCreateInfo, &allocInfo, &image, &allocation, &allocationInfo)); deviceMemory = allocationInfo.deviceMemory; - VkCommandBuffer layoutCmd = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer layoutCmd = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); imageLayout = VK_IMAGE_LAYOUT_GENERAL; setImageLayout( @@ -587,7 +587,7 @@ void ext::vulkan::Texture::asRenderTarget( Device& device, uint32_t width, uint3 this->mips ); - device.flushCommandBuffer(layoutCmd, true); + device.flushCommandBuffer(layoutCmd, Device::QueueEnum::GRAPHICS); // Create sampler // sampler.initialize( device ); @@ -707,7 +707,7 @@ void ext::vulkan::Texture::update( void* data, VkDeviceSize bufferSize, VkImageL )); // Use a separate command buffer for texture loading - VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer commandBuffer = device.createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); // Image barrier for optimal image (target) // Optimal image will be used as destination for the copy @@ -736,7 +736,7 @@ void ext::vulkan::Texture::update( void* data, VkDeviceSize bufferSize, VkImageL subresourceRange ); - device.flushCommandBuffer(commandBuffer); + device.flushCommandBuffer(commandBuffer, Device::QueueEnum::GRAPHICS); // Clean up staging resources staging.destroy(); @@ -862,7 +862,7 @@ uf::Image ext::vulkan::Texture2D::screenshot( uint32_t layerID ) { VK_CHECK_RESULT(vmaCreateImage(allocator, &imageCreateInfo, &allocationCreateInfo, &temporary, &allocation, &allocationInfo)); VkDeviceMemory temporaryMemory = allocationInfo.deviceMemory; - VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); VkImageMemoryBarrier imageMemoryBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // ext::vulkan::device.queueFamilyIndices.graphics; //VK_QUEUE_FAMILY_IGNORED @@ -929,7 +929,7 @@ uf::Image ext::vulkan::Texture2D::screenshot( uint32_t layerID ) { imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; imageMemoryBarrier.newLayout = descriptor.imageLayout; vkCmdPipelineBarrier(copyCmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, NULL, 0, NULL, 1, &imageMemoryBarrier ); - device->flushCommandBuffer(copyCmd, true); + device->flushCommandBuffer(copyCmd, Device::QueueEnum::GRAPHICS); const uint8_t* data; vmaMapMemory( allocator, allocation, (void**)&data ); @@ -970,7 +970,7 @@ uf::Image ext::vulkan::Texture3D::screenshot( uint32_t layerID ) { VK_CHECK_RESULT(vmaCreateImage(allocator, &imageCreateInfo, &allocationCreateInfo, &temporary, &allocation, &allocationInfo)); VkDeviceMemory temporaryMemory = allocationInfo.deviceMemory; - VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + VkCommandBuffer copyCmd = device->createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, Device::QueueEnum::GRAPHICS); VkImageMemoryBarrier imageMemoryBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }; imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // ext::vulkan::device.queueFamilyIndices.graphics; //VK_QUEUE_FAMILY_IGNORED @@ -1042,7 +1042,7 @@ uf::Image ext::vulkan::Texture3D::screenshot( uint32_t layerID ) { imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; imageMemoryBarrier.newLayout = descriptor.imageLayout; vkCmdPipelineBarrier(copyCmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, NULL, 0, NULL, 1, &imageMemoryBarrier ); - device->flushCommandBuffer(copyCmd, true); + device->flushCommandBuffer(copyCmd, Device::QueueEnum::GRAPHICS); const uint8_t* data; vmaMapMemory( allocator, allocation, (void**)&data ); diff --git a/engine/src/ext/vulkan/vulkan.cpp b/engine/src/ext/vulkan/vulkan.cpp index 63ed7b6d..22075819 100644 --- a/engine/src/ext/vulkan/vulkan.cpp +++ b/engine/src/ext/vulkan/vulkan.cpp @@ -14,7 +14,8 @@ #include namespace { - uf::stl::vector auxFences; + uf::stl::vector auxFencesGraphics; + uf::stl::vector auxFencesCompute; } uint32_t ext::vulkan::settings::width = 1280; @@ -36,6 +37,7 @@ VkFilter ext::vulkan::settings::swapchainUpscaleFilter = VK_FILTER_LINEAR; bool ext::vulkan::settings::experimental::dedicatedThread = false; bool ext::vulkan::settings::experimental::rebuildOnTickBegin = false; bool ext::vulkan::settings::experimental::batchQueueSubmissions = false; +bool ext::vulkan::settings::experimental::enableMultiGPU = false; // not so experimental bool ext::vulkan::settings::invariant::waitOnRenderEnd = false; @@ -76,6 +78,17 @@ uf::stl::vector ext::vulkan::renderModes = { }; uf::stl::unordered_map ext::vulkan::renderModesMap; +PFN_vkGetBufferDeviceAddressKHR ext::vulkan::vkGetBufferDeviceAddressKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetBufferDeviceAddressKHR")); +PFN_vkCmdBuildAccelerationStructuresKHR ext::vulkan::vkCmdBuildAccelerationStructuresKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCmdBuildAccelerationStructuresKHR")); +PFN_vkBuildAccelerationStructuresKHR ext::vulkan::vkBuildAccelerationStructuresKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkBuildAccelerationStructuresKHR")); +PFN_vkCreateAccelerationStructureKHR ext::vulkan::vkCreateAccelerationStructureKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCreateAccelerationStructureKHR")); +PFN_vkDestroyAccelerationStructureKHR ext::vulkan::vkDestroyAccelerationStructureKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkDestroyAccelerationStructureKHR")); +PFN_vkGetAccelerationStructureBuildSizesKHR ext::vulkan::vkGetAccelerationStructureBuildSizesKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetAccelerationStructureBuildSizesKHR")); +PFN_vkGetAccelerationStructureDeviceAddressKHR ext::vulkan::vkGetAccelerationStructureDeviceAddressKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetAccelerationStructureDeviceAddressKHR")); +PFN_vkCmdTraceRaysKHR ext::vulkan::vkCmdTraceRaysKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCmdTraceRaysKHR")); +PFN_vkGetRayTracingShaderGroupHandlesKHR ext::vulkan::vkGetRayTracingShaderGroupHandlesKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkGetRayTracingShaderGroupHandlesKHR")); +PFN_vkCreateRayTracingPipelinesKHR ext::vulkan::vkCreateRayTracingPipelinesKHR = NULL; // = reinterpret_cast(vkGetDeviceProcAddr(device, "vkCreateRayTracingPipelinesKHR")); + VkResult ext::vulkan::CreateDebugUtilsMessengerEXT(VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { auto func = (PFN_vkCreateDebugUtilsMessengerEXT) vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); if ( func == nullptr ) return VK_ERROR_EXTENSION_NOT_PRESENT; @@ -297,13 +310,15 @@ void ext::vulkan::initialize() { } { - ::auxFences.resize( swapchain.buffers ); + ::auxFencesGraphics.resize( swapchain.buffers ); + ::auxFencesCompute.resize( swapchain.buffers ); VkFenceCreateInfo fenceCreateInfo = {}; fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceCreateInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; - for ( auto& fence : ::auxFences ) VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence)); + for ( auto& fence : ::auxFencesGraphics ) VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence)); + for ( auto& fence : ::auxFencesCompute ) VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &fence)); } uf::graph::initialize(); @@ -313,10 +328,10 @@ void ext::vulkan::initialize() { renderMode->initialize(device); } - auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread ? "Aux" : "Main"); + auto tasks = uf::thread::schedule( settings::invariant::multithreadedRecording ); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; tasks.queue([&]{ - auto guard = renderMode->guardMutex(); + // auto guard = renderMode->guardMutex(); // renderMode->lockMutex(); if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); renderMode->createCommandBuffers(); @@ -336,7 +351,7 @@ void ext::vulkan::tick() { { auto& scene = uf::scene::getCurrentScene(); auto& graph = scene.getGraph(); - auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread ? "Aux" : "Main"); + auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread); for ( auto entity : graph ) { if ( !entity->hasComponent() ) continue; ext::vulkan::Graphic& graphic = entity->getComponent(); @@ -363,7 +378,7 @@ void ext::vulkan::tick() { #endif #if 0 { - auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread ? "Aux" : "Main"); + auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; if ( !renderMode->device ) { @@ -393,10 +408,10 @@ void ext::vulkan::tick() { } #endif { - auto tasks = uf::thread::schedule(settings::experimental::dedicatedThread ? "Aux" : "Main"); + auto tasks = uf::thread::schedule( settings::invariant::multithreadedRecording ); for ( auto& renderMode : renderModes ) { if ( !renderMode ) continue; if ( ext::vulkan::states::rebuild || renderMode->rebuild ) tasks.queue([&]{ - auto guard = renderMode->guardMutex(); + // auto guard = renderMode->guardMutex(); // renderMode->lockMutex(); if ( settings::invariant::individualPipelines ) renderMode->bindPipelines(); renderMode->createCommandBuffers(); @@ -419,7 +434,7 @@ void ext::vulkan::render() { for ( auto renderMode : renderModes ) { if ( !renderMode || !renderMode->execute || !renderMode->metadata.limiter.execute ) continue; - renderMode->lockMutex( renderMode->mostRecentCommandPoolId ); + // renderMode->lockMutex( renderMode->mostRecentCommandPoolId ); if ( renderMode->commandBufferCallbacks.count(RenderMode::EXECUTE_BEGIN) > 0 ) renderMode->commandBufferCallbacks[RenderMode::EXECUTE_BEGIN]( VkCommandBuffer{} ); if ( renderMode->getName() == "Gui" || renderMode->getName() == "" || renderMode->getName() == "Swapchain" ) @@ -431,7 +446,8 @@ void ext::vulkan::render() { // stuff we can batch { // Get next image in the swap chain (back/front buffer) - uf::stl::vector submits; submits.reserve( auxRenderModes.size() ); + uf::stl::vector submitsGraphics; submitsGraphics.reserve( auxRenderModes.size() ); + uf::stl::vector submitsCompute; submitsCompute.reserve( auxRenderModes.size() ); for ( auto renderMode : auxRenderModes ) { auto submitInfo = renderMode->queue(); if ( submitInfo.sType != VK_STRUCTURE_TYPE_SUBMIT_INFO ) continue; @@ -440,15 +456,24 @@ void ext::vulkan::render() { uf::graph::render(); uf::scene::render(); - submits.emplace_back(submitInfo); + if ( renderMode->getType() == "Compute" ) { + submitsCompute.emplace_back(submitInfo); + } else { + submitsGraphics.emplace_back(submitInfo); + } renderMode->executed = true; ext::vulkan::setCurrentRenderMode(NULL); } - if ( !submits.empty() ) { - VK_CHECK_RESULT(vkWaitForFences(device, 1, &::auxFences[states::currentBuffer], VK_TRUE, UINT64_MAX)); - VK_CHECK_RESULT(vkResetFences(device, 1, &::auxFences[states::currentBuffer])); - VK_CHECK_RESULT(vkQueueSubmit(device.getQueue( Device::QueueEnum::GRAPHICS ), submits.size(), submits.data(), ::auxFences[states::currentBuffer])); + if ( !submitsCompute.empty() ) { + VK_CHECK_RESULT(vkWaitForFences(device, 1, &::auxFencesCompute[states::currentBuffer], VK_TRUE, UINT64_MAX)); + VK_CHECK_RESULT(vkResetFences(device, 1, &::auxFencesCompute[states::currentBuffer])); + VK_CHECK_RESULT(vkQueueSubmit(device.getQueue( Device::QueueEnum::COMPUTE ), submitsCompute.size(), submitsCompute.data(), ::auxFencesCompute[states::currentBuffer])); + } + if ( !submitsGraphics.empty() ) { + VK_CHECK_RESULT(vkWaitForFences(device, 1, &::auxFencesGraphics[states::currentBuffer], VK_TRUE, UINT64_MAX)); + VK_CHECK_RESULT(vkResetFences(device, 1, &::auxFencesGraphics[states::currentBuffer])); + VK_CHECK_RESULT(vkQueueSubmit(device.getQueue( Device::QueueEnum::GRAPHICS ), submitsGraphics.size(), submitsGraphics.data(), ::auxFencesGraphics[states::currentBuffer])); } } // stuff we can't batch @@ -464,14 +489,15 @@ void ext::vulkan::render() { for ( auto renderMode : renderModes ) { if ( renderMode->commandBufferCallbacks.count(RenderMode::EXECUTE_END) > 0 ) renderMode->commandBufferCallbacks[RenderMode::EXECUTE_END]( VkCommandBuffer{} ); - renderMode->unlockMutex( renderMode->mostRecentCommandPoolId ); + // renderMode->cleanupCommands( renderMode->mostRecentCommandPoolId ); + // renderMode->unlockMutex( renderMode->mostRecentCommandPoolId ); } } else { for ( auto& renderMode : renderModes ) { if ( !renderMode || !renderMode->execute || !renderMode->metadata.limiter.execute ) continue; // renderMode->lockMutex( renderMode->mostRecentCommandPoolId ); - auto guard = renderMode->guardMutex( renderMode->mostRecentCommandPoolId ); + // auto guard = renderMode->guardMutex( renderMode->mostRecentCommandPoolId ); ext::vulkan::setCurrentRenderMode(renderMode); uf::graph::render(); uf::scene::render(); @@ -479,6 +505,9 @@ void ext::vulkan::render() { ext::vulkan::setCurrentRenderMode(NULL); // renderMode->unlockMutex( renderMode->mostRecentCommandPoolId ); } + for ( auto& renderMode : renderModes ) { + // renderMode->cleanupCommands( renderMode->mostRecentCommandPoolId ); + } } @@ -490,7 +519,8 @@ void ext::vulkan::destroy() { ext::vulkan::mutex.lock(); synchronize(); - for ( auto& fence : ::auxFences ) vkDestroyFence( device, fence, nullptr); + for ( auto& fence : ::auxFencesGraphics ) vkDestroyFence( device, fence, nullptr); + for ( auto& fence : ::auxFencesCompute ) vkDestroyFence( device, fence, nullptr); Texture2D::empty.destroy(); Texture3D::empty.destroy(); diff --git a/engine/src/ext/xatlas/xatlas.cpp b/engine/src/ext/xatlas/xatlas.cpp index 1116a500..6f7dde76 100644 --- a/engine/src/ext/xatlas/xatlas.cpp +++ b/engine/src/ext/xatlas/xatlas.cpp @@ -164,9 +164,9 @@ size_t UF_API ext::xatlas::unwrapExperimental( pod::Graph& graph ) { // pack #if UF_XATLAS_UNWRAP_MULTITHREAD - auto tasks = uf::thread::schedule("Async"); + auto tasks = uf::thread::schedule(true); #else - auto tasks = uf::thread::schedule("Main"); + auto tasks = uf::thread::schedule(false); #endif for ( auto& pair : atlases ) { tasks.queue([&]{ @@ -518,9 +518,9 @@ size_t UF_API ext::xatlas::unwrapLazy( pod::Graph& graph ) { // pack #if UF_XATLAS_UNWRAP_MULTITHREAD - auto tasks = uf::thread::schedule("Async"); + auto tasks = uf::thread::schedule(true); #else - auto tasks = uf::thread::schedule("Main"); + auto tasks = uf::thread::schedule(false); #endif for ( auto& pair : atlases ) { tasks.queue([&]{ diff --git a/engine/src/utils/io/iostream.cpp b/engine/src/utils/io/iostream.cpp index f9b567c2..96a212f7 100644 --- a/engine/src/utils/io/iostream.cpp +++ b/engine/src/utils/io/iostream.cpp @@ -52,11 +52,7 @@ namespace { for ( auto x : str.getString() ) addCh(x); } }; -#ifndef UF_USE_NCURSES - bool uf::IoStream::ncurses = false; -#else - bool uf::IoStream::ncurses = true; -#endif +bool uf::IoStream::ncurses = false; uf::IoStream uf::iostream; UF_API_CALL uf::IoStream::IoStream() { @@ -66,58 +62,11 @@ uf::IoStream::~IoStream() { } void UF_API_CALL uf::IoStream::initialize() { -#if UF_USE_NCURSES - if ( !uf::IoStream::ncurses ) return; - ext::ncurses.initialize(); - if ( ext::ncurses.hasColors() ) { - ext::ncurses.startColor(); - - this->m_registeredColors = { - {"Red", {1, COLOR_RED, COLOR_BLACK, "COLOR_RED" } }, - {"Green", {2, COLOR_GREEN, COLOR_BLACK, "COLOR_GREEN" } }, - {"Yellow", {3, COLOR_YELLOW, COLOR_BLACK, "COLOR_YELLOW" } }, - {"Blue", {4, COLOR_BLUE, COLOR_BLACK, "COLOR_BLUE" } }, - {"Cyan", {5, COLOR_CYAN, COLOR_BLACK, "COLOR_CYAN" } }, - {"Magenta", {6, COLOR_MAGENTA, COLOR_BLACK, "COLOR_MAGENTA" } }, - {"White", {7, COLOR_WHITE, COLOR_BLACK, "COLOR_WHITE" } }, - }; - for ( auto& pair : this->m_registeredColors ) { - auto& color = pair.second; - ext::ncurses.initPair( color.id, color.foreground, color.background ); - } - this->setColor("White"); - } else { - uf::iostream << "Color not supported!" << "\n"; - this->readChar(); - this->terminate(); - } -#endif } void UF_API_CALL uf::IoStream::terminate() { -#if UF_USE_NCURSES - if (uf::IoStream::ncurses) ext::ncurses.terminate(); -#endif } void UF_API_CALL uf::IoStream::clear(bool all) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); -#endif - if ( !uf::IoStream::ncurses ) { - if ( all ) { - spec::terminal.clear(); - } - } -#if UF_USE_NCURSES - if ( all ) { - ext::ncurses.move(0,0); - ext::ncurses.clear(true); - return; - } - - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.move(::info.cursor.row, 0); - ext::ncurses.clear(); -#endif + spec::terminal.clear(); } uf::stl::string uf::IoStream::getBuffer() { return ::info.output.buffer; @@ -126,343 +75,51 @@ uf::stl::vector uf::IoStream::getHistory() { return ::info.output.history; } void UF_API_CALL uf::IoStream::back() { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) return; - if ( !uf::IoStream::ncurses ) return; -/* - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - struct { - int row = ::info.cursor.row, column = ::info.cursor.column; - } target; - if ( ::info.output.buffer.size() > 0 ) { - ::info.output.buffer = ::info.output.buffer.substr( 0, ::info.output.buffer.size() ); - } else if ( ::info.output.history.size() > 0 ) { - ::info.output.buffer = ::info.output.history.back(); - ::info.output.history.erase( ::info.output.history.end() - 1 ); - ext::ncurses.move(::info.cursor.row, ::info.output.buffer.size(), ::info.cursor.row, ::info.cursor.column); - } -*/ - if ( !::info.output.buffer.empty() ) { - ::info.output.buffer.pop_back(); - ext::ncurses.delChar(); - } else if ( !::info.output.history.empty() ) { - ::info.output.buffer = ::info.output.history.back(); - ::info.output.history.pop_back(); - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.move(::info.cursor.row - 1, ::info.output.buffer.size(), ::info.cursor.row, ::info.cursor.column); - } - ext::ncurses.refresh(); -#endif } char UF_API_CALL uf::IoStream::readChar(const bool& loop) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); -#endif - if ( !uf::IoStream::ncurses ) { - auto ch = std::cin.get(); - ::info.input.history.push_back( std::to_string(ch) ); - return ch; - } -#if UF_USE_NCURSES - while ( loop ) { - ::info.character = ext::ncurses.getCh(); - if ( ::info.character == ERR ) continue; - if ( ::info.character > 0 && ::info.character < 128 ) return ::info.character; - } -#endif - return 0; + auto ch = std::cin.get(); + + addCh(ch); +// ::info.input.history.push_back( std::to_string(ch) ); + return ch; } uf::stl::string UF_API_CALL uf::IoStream::readString(const bool& loop) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); - // static uf::stl::vector history; -#endif - if ( !uf::IoStream::ncurses ) { - uf::stl::string in; - std::getline(std::cin, in); - ::info.input.history.push_back( in ); - return in; - } -#if UF_USE_NCURSES - /*uf::stl::string ::info.input.buffer; - int ch; - struct { - int r = 0, c = 0; - int r_max = 0, c_max = 0; - } home; - unsigned int cursor = 0; - - uf::stl::string ::info.temporary; - int ::info.indices.history = -1;*/ - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.getMaxYX(::info.window.rows, ::info.window.columns); - ::info.origin = ::info.cursor; + uf::stl::string str; + std::getline(std::cin, str); - // break on new line or carriage return - while ( loop && (::info.character = ext::ncurses.getCh()) != '\n' && ::info.character != '\r' ) { - // err - if ( ::info.character == ERR ) { - continue; - // mouse - } else if ( ::info.character == KEY_MOUSE ) { - /* - MEVENT event; - if ( wgetmouse(&event) == OK ) { - - } - */ - // backspace - } else if ( ::info.character == '\b' && ::info.input.buffer.length() > 0 ) { - ::info.input.buffer = ::info.input.buffer.substr( 0, ::info.input.buffer.length() - 1 ); - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.move(::info.cursor.row, ::info.cursor.column-1, ::info.cursor.row, ::info.cursor.column); - ext::ncurses.delChar(); - ext::ncurses.refresh(); - --::info.indices.buffer; - // left/right arrow keys - } else if ( ::info.character == KEY_LEFT || ::info.character == KEY_RIGHT ) { - int dir = (::info.character == KEY_LEFT) ? -1 : 1; - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - if ( ::info.cursor.column < ::info.window.columns && ::info.cursor.column > ::info.origin.column ) { - ::info.cursor.column += dir; - ::info.indices.buffer += dir; - ext::ncurses.move(-1, ::info.cursor.column); - } - // up/down arrow keys - } else if ( ::info.character == KEY_UP || ::info.character == KEY_DOWN ) { - if ( ::info.temporary == "" ) ::info.temporary = ::info.input.buffer; - int dir = (::info.character == KEY_UP) ? -1 : 1; - if ( ::info.indices.history - 1 >= 0 && ::info.indices.history + 1 < ::info.input.history.size() ) { - ext::ncurses.move(::info.origin.row, ::info.origin.column, ::info.cursor.row, ::info.cursor.column); - ext::ncurses.clear(); - ::info.indices.history += dir; - ::info.input.buffer = *(::info.input.history.end() - ::info.indices.history - 1); - ::info.indices.buffer = ::info.input.buffer.size(); - uf::iostream << ::info.input.buffer; - } - /* else if ( delta < 0 ) { - ext::ncurses.move(home.r, home.c); - ext::ncurses.clear(); - ::info.input.buffer = ::info.temporary; - ::info.indices.buffer = ::info.input.buffer.size(); - uf::iostream << ::info.input.buffer; - } - */ - // valid characters - } else if ( ::info.character >= 32 && ::info.character <= 127 ) { - char at = ::info.character; - ::info.temporary = ""; - if ( ::info.indices.buffer == ::info.input.buffer.length() ) { - ::info.input.buffer += this->writeChar(at); - } else { - ::info.input.buffer[::info.indices.buffer] = this->writeChar(at); - } - ++::info.indices.buffer; - } - } - uf::iostream << "\n"; - ::info.input.history.push_back(::info.input.buffer); -#endif - return ::info.input.buffer; + addStr(str); +// ::info.input.history.push_back( str ); + return str; } uf::String UF_API_CALL uf::IoStream::readUString(const bool& loop) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); - // static uf::stl::vector history; -#endif - if ( !uf::IoStream::ncurses ) { - uf::stl::string in; - std::getline(std::cin, in); - ::info.input.history.push_back( in ); - return in; - } -#if UF_USE_NCURSES - /*uf::stl::string ::info.input.buffer; - int ch; - struct { - int r = 0, c = 0; - int r_max = 0, c_max = 0; - } home; - unsigned int cursor = 0; - - uf::stl::string ::info.temporary; - int ::info.indices.history = -1;*/ - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.getMaxYX(::info.window.rows, ::info.window.columns); - ::info.origin = ::info.cursor; + uf::stl::string str; + std::getline(std::cin, str); - // break on new line or carriage return - while ( loop && (::info.character = ext::ncurses.getCh()) != '\n' && ::info.character != '\r' ) { - // err - if ( ::info.character == ERR ) { - continue; - // mouse - } else if ( ::info.character == KEY_MOUSE ) { - /* - MEVENT event; - if ( wgetmouse(&event) == OK ) { - - } - */ - // backspace - } else if ( ::info.character == '\b' && ::info.input.buffer.length() > 0 ) { - ::info.input.buffer = ::info.input.buffer.substr( 0, ::info.input.buffer.length() - 1 ); - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - ext::ncurses.move(::info.cursor.row, ::info.cursor.column-1, ::info.cursor.row, ::info.cursor.column); - ext::ncurses.delChar(); - ext::ncurses.refresh(); - --::info.indices.buffer; - // left/right arrow keys - } else if ( ::info.character == KEY_LEFT || ::info.character == KEY_RIGHT ) { - int dir = (::info.character == KEY_LEFT) ? -1 : 1; - ext::ncurses.getYX(::info.cursor.row, ::info.cursor.column); - if ( ::info.cursor.column < ::info.window.columns && ::info.cursor.column > ::info.origin.column ) { - ::info.cursor.column += dir; - ::info.indices.buffer += dir; - ext::ncurses.move(-1, ::info.cursor.column); - } - // up/down arrow keys - } else if ( ::info.character == KEY_UP || ::info.character == KEY_DOWN ) { - if ( ::info.temporary == "" ) ::info.temporary = ::info.input.buffer; - int dir = (::info.character == KEY_UP) ? -1 : 1; - if ( ::info.indices.history - 1 >= 0 && ::info.indices.history + 1 < ::info.input.history.size() ) { - ext::ncurses.move(::info.origin.row, ::info.origin.column, ::info.cursor.row, ::info.cursor.column); - ext::ncurses.clear(); - ::info.indices.history += dir; - ::info.input.buffer = *(::info.input.history.end() - ::info.indices.history - 1); - ::info.indices.buffer = ::info.input.buffer.size(); - uf::iostream << ::info.input.buffer; - } - /* else if ( delta < 0 ) { - ext::ncurses.move(home.r, home.c); - ext::ncurses.clear(); - ::info.input.buffer = ::info.temporary; - ::info.indices.buffer = ::info.input.buffer.size(); - uf::iostream << ::info.input.buffer; - } - */ - // valid characters - // UTF-8 char - } else { - int iterations = 0; - char at = ::info.character; - if ( 32 <= ::info.character && ::info.character <= 127 ) iterations = 1; - if ( 194 <= ::info.character && ::info.character <= 223 ) iterations = 2; - if ( 224 <= ::info.character && ::info.character <= 239 ) iterations = 3; - if ( 240 <= ::info.character && ::info.character <= 244 ) iterations = 4; - for ( int i = 0; i < iterations; ++i, ++::info.indices.buffer ) { - if ( at != ::info.character ) at = ext::ncurses.getCh(); - if ( ::info.indices.buffer == ::info.input.buffer.length() ) ::info.input.buffer += this->writeChar(at); - else ::info.input.buffer[::info.indices.buffer] = this->writeChar(at); - at = 0; - } - } - /* - // ANSI / 1-byte UTF-8 - } else if ( 32 <= ch && ch <= 127 ) { - char at = ch; - if ( cursor == ::info.input.buffer.length() ) { - ::info.input.buffer += this->writeChar(at); - } else { - ::info.input.buffer[cursor] = this->writeChar(at); - } - ++cursor; - // 2-byte UTF-8 - } else if ( 194 <= ch && ch <= 223 ) { - char at = ch; - for ( int i = 0; i < 2; ++i, ++cursor ) { - if ( at != ch ) at = ext::ncurses.getCh(); - if ( cursor == ::info.input.buffer.length() ) ::info.input.buffer += this->writeChar(at); - else ::info.input.buffer[cursor] = this->writeChar(at); - at = 0; - } - } - */ - } - uf::iostream << "\n"; - ::info.input.history.push_back(::info.input.buffer); -#endif - return ::info.input.buffer; + addUStr(str); +// ::info.input.history.push_back( str ); + return str; } char UF_API_CALL uf::IoStream::writeChar( char ch ) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); -#endif addCh(ch); -/* - if ( ch == '\r' ) ch = '\n'; - if ( ch != '\n' ) { - ::info.output.buffer += ch; - } else { - this->writeString("new line: " + ::info.output.buffer + "\n"); - ::info.output.history.push_back(::info.output.buffer); - ::info.output.buffer = ""; - } -*/ - if ( !uf::IoStream::ncurses ) { - if ( ch == '\n' ) std::cout << std::endl; - else std::cout << ch; - ::info.input.history.push_back( std::to_string(ch) ); - return ch; - } -#if UF_USE_NCURSES - ext::ncurses.addChar(ch); - ext::ncurses.refresh(); -#endif + + if ( ch == '\n' ) std::cout << std::endl; + else std::cout << ch; +// ::info.input.history.push_back( std::to_string(ch) ); return ch; } const uf::stl::string& UF_API_CALL uf::IoStream::writeString( const uf::stl::string& str ) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); -#endif addStr(str); -/* - std::size_t needle; - uf::stl::string haystack = str; - while ( (needle = haystack.find('\n')) != uf::stl::string::npos ) { - ::info.output.buffer += haystack.substr( 0, needle ); - ::info.output.history.push_back(::info.output.buffer); - ::info.output.buffer = ""; - haystack = haystack.substr( needle + 1 ); - } - ::info.output.buffer += haystack; -*/ - if ( !uf::IoStream::ncurses ) { - if ( str == "\n" ) std::cout << std::endl; - else std::cout << str; - ::info.input.history.push_back( str ); - return str; - } -#if UF_USE_NCURSES - ext::ncurses.addStr(str.c_str()); - ext::ncurses.refresh(); -#endif + + if ( str == "\n" ) std::cout << std::endl; + else std::cout << str; +// ::info.input.history.push_back( str ); return str; } const uf::String& UF_API_CALL uf::IoStream::writeUString( const uf::String& str ) { -#if UF_USE_NCURSES - if ( !ext::ncurses.initialized() ) this->initialize(); -#endif addUStr(str); -/* - std::size_t needle; - auto haystack = str.getString(); - while ( (needle = haystack.find('\n')) != uf::stl::string::npos ) { - ::info.output.buffer += uf::stl::string((const char*) haystack.substr( 0, needle ).c_str()); - ::info.output.history.push_back(::info.output.buffer); - ::info.output.buffer = ""; - haystack = haystack.substr( needle + 1 ); - } -*/ - if ( !uf::IoStream::ncurses ) { - std::cout << (const char*) str.getString().c_str(); - ::info.input.history.push_back( str ); - return str; - } -#if UF_USE_NCURSES - ext::ncurses.addStr((const char*) str.getString().c_str()); - ext::ncurses.refresh(); -#endif + + std::cout << (const char*) str.getString().c_str(); +// ::info.input.history.push_back( str ); return str; } @@ -669,19 +326,6 @@ uf::stl::string uf::IoStream::getColor() { return this->m_currentColor; } void UF_API_CALL uf::IoStream::setColor( const uf::stl::string& str ) { -#if UF_USE_NCURSES - if ( !uf::IoStream::ncurses ) return; - if ( !ext::ncurses.initialized() ) this->initialize(); - - if ( this->m_registeredColors.count(str) < 1 ) return; - - if ( ::info.color.last != 0 ) ext::ncurses.attrOff(COLOR_PAIR(::info.color.last)); - short id = this->m_registeredColors.at(str).id; - ext::ncurses.attrOn(COLOR_PAIR(id)); - ::info.color.last = id; - - this->m_currentColor = str; -#endif } // manip via stream manipulator diff --git a/engine/src/utils/thread/thread.cpp b/engine/src/utils/thread/thread.cpp index 20153d49..06ae6894 100644 --- a/engine/src/utils/thread/thread.cpp +++ b/engine/src/utils/thread/thread.cpp @@ -8,36 +8,45 @@ float uf::thread::limiter = 1.0f / 120.0f; uint uf::thread::workers = 1; std::thread::id uf::thread::mainThreadId = std::this_thread::get_id(); bool uf::thread::async = false; +uf::stl::string uf::thread::workerThreadName = "Worker"; +uf::stl::string uf::thread::mainThreadName = "Main"; -#define UF_THREAD_ANNOUNCE(x) //UF_MSG_DEBUG(x) +#define UF_THREAD_ANNOUNCE(x) UF_MSG_DEBUG(x) void UF_API uf::thread::start( pod::Thread& thread ) { if ( thread.running ) return; thread.thread = std::thread( uf::thread::tick, std::ref(thread) ); thread.running = true; } -void UF_API uf::thread::quit( pod::Thread& thread ) { // if ( !thread.running ) return; +void UF_API uf::thread::quit( pod::Thread& thread ) { if ( !thread.running ) return; thread.running = false; + thread.conditions.queued.notify_one(); - if ( thread.mutex != NULL ) thread.mutex->lock(); + bool locked = false; +// if ( thread.mutex != NULL ) locked = thread.mutex->try_lock(); if ( thread.thread.joinable() ) thread.thread.join(); - if ( thread.mutex != NULL ) thread.mutex->unlock(); +// if ( thread.mutex != NULL ) thread.mutex->unlock(); } void UF_API uf::thread::tick( pod::Thread& thread ) { #if !UF_ENV_DREAMCAST bool res = SetThreadAffinityMask(GetCurrentThread(), (1u << thread.affinity)); - if ( !res ) UF_THREAD_ANNOUNCE("Failed to set affinity of Thread #" << thread.uid << " (" << thread.name << " on ID " << pthread_self() << "/" << thread.affinity << ")"); + if ( !res ) UF_THREAD_ANNOUNCE("Failed to set affinity of Thread #" << thread.uid << " (" << thread.name << " on core " << pthread_self() << "/" << thread.affinity << ")"); #endif - UF_THREAD_ANNOUNCE("Starting Thread #" << thread.uid << " (" << thread.name << " on ID " << thread.affinity << ") (Limiter: " << (1.0f / thread.limiter) << " FPS)"); + UF_THREAD_ANNOUNCE("Starting Thread #" << thread.uid << " (" << thread.name << " on core " << thread.affinity << ")" << (thread.limiter ? " (Limiter: " + std::to_string(1.0f / thread.limiter) + " FPS)" : "")); thread.timer.start(); while ( thread.running ) { + std::unique_lock lock(*thread.mutex); + thread.conditions.queued.wait(lock, [&]{ + return !thread.queue.empty() || !thread.running; + }); + uf::thread::process( thread ); - if ( thread.terminates && thread.queue.empty() && thread.container.empty() ) uf::thread::quit( thread ); if ( thread.limiter > 0 ) { long long sleep = (thread.limiter * 1000) - thread.timer.elapsed().asMilliseconds(); if ( sleep > 0 ) { + // UF_THREAD_ANNOUNCE("Thread #" << thread.uid << " (" << thread.name << " on core " << thread.affinity << ") will sleep for " << sleep << "ms"); std::this_thread::sleep_for(std::chrono::milliseconds(sleep)); } thread.timer.reset(); @@ -48,17 +57,15 @@ void UF_API uf::thread::tick( pod::Thread& thread ) { pod::Thread& UF_API uf::thread::fetchWorker( const uf::stl::string& name ) { static int current = 0; static int limit = uf::thread::workers; - static uint threads = std::thread::hardware_concurrency(); - int tries = 8; - while ( --tries >= 0 ) { - if ( ++current >= limit ) current = 0; - uf::stl::string thread = name; - if ( current > 0 ) thread += " " + std::to_string(current); - auto& pod = uf::thread::get(thread); - if ( std::this_thread::get_id() != pod.thread.get_id() ) return pod; - } - - return uf::thread::get("Main"); + + uf::stl::string thread = name + " " + std::to_string(current); + if ( ++current >= limit ) current = 0; + auto& pod = uf::thread::get(thread); + UF_ASSERT( std::this_thread::get_id() != pod.thread.get_id() ); + return pod; +} +pod::Thread::Tasks UF_API uf::thread::schedule( bool async, bool wait ) { + return schedule( async ? uf::thread::workerThreadName : uf::thread::mainThreadName, wait ); } pod::Thread::Tasks UF_API uf::thread::schedule( const uf::stl::string& name, bool wait ) { pod::Thread::Tasks tasks = { @@ -69,16 +76,15 @@ pod::Thread::Tasks UF_API uf::thread::schedule( const uf::stl::string& name, boo return tasks; } void UF_API uf::thread::execute( pod::Thread::Tasks& tasks ) { - if ( tasks.container.empty() ) return; - if ( tasks.name == "Main" ) { - // for ( auto& task : tasks.container ) task(); + if ( tasks.container.empty() ) return; + if ( tasks.name == uf::thread::mainThreadName ) { while ( !tasks.container.empty() ) { auto& task = tasks.container.front(); task(); tasks.container.pop(); } - - } else if ( tasks.name == "Async" ) { +#if 0 + } else /*if ( tasks.name == "Async" )*/ { uf::stl::vector> futures; futures.reserve(tasks.container.size()); // for ( auto& task : tasks.container ) { @@ -88,9 +94,9 @@ void UF_API uf::thread::execute( pod::Thread::Tasks& tasks ) { tasks.container.pop(); } if ( tasks.waits ) for ( auto& future : futures ) future.wait(); +#else } else { uf::stl::vector workers; - // for ( auto& task : tasks.container ) { while ( !tasks.container.empty() ) { auto task = tasks.container.front(); auto& worker = uf::thread::fetchWorker( tasks.name ); @@ -100,6 +106,7 @@ void UF_API uf::thread::execute( pod::Thread::Tasks& tasks ) { } if ( tasks.waits ) for ( auto& worker : workers ) uf::thread::wait( *worker ); } +#endif } /* void UF_API uf::thread::batchWorker( const pod::Thread::function_t& function, const uf::stl::string& name ) { @@ -145,6 +152,7 @@ void UF_API uf::thread::queue( const pod::Thread::function_t& function ) { void UF_API uf::thread::queue( pod::Thread& thread, const pod::Thread::function_t& function ) { if ( thread.mutex != NULL ) thread.mutex->lock(); thread.queue.emplace( function ); + thread.conditions.queued.notify_one(); if ( thread.mutex != NULL ) thread.mutex->unlock(); } void UF_API uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(uf::thread::uid(thread)) ) { UF_THREAD_ANNOUNCE("Bad Thread: " << thread.uid << " " << thread.name); return; } //ops @@ -174,12 +182,12 @@ void UF_API uf::thread::process( pod::Thread& thread ) { if ( !uf::thread::has(u } #endif } - thread.condition.notify_one(); + thread.conditions.finished.notify_one(); } void UF_API uf::thread::wait( pod::Thread& thread ) { if ( thread.mutex != NULL ) { std::unique_lock lock(*thread.mutex); - thread.condition.wait(lock, [&]{return thread.queue.empty();}); + thread.conditions.finished.wait(lock, [&]{return thread.queue.empty();}); return; } while ( !thread.queue.empty() ); @@ -202,7 +210,7 @@ void UF_API uf::thread::terminate() { } } pod::Thread& UF_API uf::thread::create( const uf::stl::string& name, bool start, bool locks ) { - if ( name == "Main" ) start = false; + if ( name == uf::thread::mainThreadName ) start = false; pod::Thread* pointer = NULL; uf::thread::threads.emplace_back(pointer = new pod::Thread); @@ -213,14 +221,12 @@ pod::Thread& UF_API uf::thread::create( const uf::stl::string& name, bool start, static uint threads = std::thread::hardware_concurrency(); thread.name = name; thread.uid = uids++; - thread.terminates = false; thread.running = false; - thread.mutex = NULL; thread.mutex = locks ? new std::mutex : NULL; thread.limiter = uf::thread::limiter; thread.affinity = (thread.uid % limit) + 1; - UF_THREAD_ANNOUNCE("Creating Thread #" << thread.uid << " (" << name << ") " << &thread << " (Affinity: " << thread.affinity << ") (Limiter: " << (1.0f / thread.limiter) << " FPS)" << (locks ? " with mutex" : "")); + UF_THREAD_ANNOUNCE("Creating Thread #" << thread.uid << " (" << thread.name << " on core " << thread.affinity << ")" << (thread.limiter ? " (Limiter: " + std::to_string(1.0f / thread.limiter) + " FPS)" : "")); if ( start ) uf::thread::start( thread ); diff --git a/ext/behaviors/baking/behavior.cpp b/ext/behaviors/baking/behavior.cpp index c7926bf9..f3d0edf4 100644 --- a/ext/behaviors/baking/behavior.cpp +++ b/ext/behaviors/baking/behavior.cpp @@ -127,9 +127,9 @@ SAVE: { UF_MSG_DEBUG("Baking..."); #if UF_BAKER_SAVE_MULTITHREAD - auto tasks = uf::thread::schedule("Async"); + auto tasks = uf::thread::schedule(true); #else - auto tasks = uf::thread::schedule("Main"); + auto tasks = uf::thread::schedule(false); #endif // 0 is always broken, do not save it for ( size_t i = 0; i < metadata.max.layers; ++i ) { diff --git a/ext/behaviors/scene/behavior.cpp b/ext/behaviors/scene/behavior.cpp index 3b1bfcf3..faccb891 100644 --- a/ext/behaviors/scene/behavior.cpp +++ b/ext/behaviors/scene/behavior.cpp @@ -195,7 +195,9 @@ void ext::ExtSceneBehavior::initialize( uf::Object& self ) { pixels.insert( pixels.end(), p.begin(), p.end() ); } // texture.mips = 0; - texture.fromBuffers( (void*) pixels.data(), pixels.size(), uf::renderer::enums::Format::R8G8B8A8_UNORM, size.x, size.y, 1, filenames.size() ); + if ( size.x > 0 && size.y > 0 ) { + texture.fromBuffers( (void*) pixels.data(), pixels.size(), uf::renderer::enums::Format::R8G8B8A8_UNORM, size.x, size.y, 1, filenames.size() ); + } } #endif } @@ -633,7 +635,7 @@ void ext::ExtSceneBehavior::Metadata::deserialize( uf::Object& self, uf::Seriali /*this->*/fog.density.multiplier = serializer["light"]["fog"]["density"]["multiplier"].as(); /*this->*/fog.density.scale = serializer["light"]["fog"]["density"]["scale"].as(); - /*this->*/sky.box.filename = serializer["sky"]["box"]["filename"].as(); + /*this->*/sky.box.filename = serializer["sky"]["box"]["filename"].as(sky.box.filename); /*this->*/shader.mode = serializer["system"]["renderer"]["shader"]["mode"].as(); /*this->*/shader.scalar = serializer["system"]["renderer"]["shader"]["scalar"].as(); diff --git a/ext/main.cpp b/ext/main.cpp index 893d2760..3c87483f 100644 --- a/ext/main.cpp +++ b/ext/main.cpp @@ -243,12 +243,12 @@ void EXT_API ext::initialize() { } /* Frame limiter */ { - float limit = configEngineLimitersJson["framerate"].as(); + size_t limit = configEngineLimitersJson["framerate"].as(); ::times.limiter = limit != 0 ? 1.0 / limit : 0; UF_MSG_DEBUG("Limiter set to " << ::times.limiter << "ms"); } /* Max delta time */{ - float limit = configEngineLimitersJson["deltaTime"].as(); + size_t limit = configEngineLimitersJson["deltaTime"].as(); uf::physics::time::clamp = limit != 0 ? 1.0 / limit : 0; } @@ -264,18 +264,12 @@ void EXT_API ext::initialize() { } /* Thread frame limiter */ { - float limit = configEngineThreadJson["frame limiter"].as(); + size_t limit = configEngineThreadJson["frame limiter"].as(); uf::thread::limiter = limit != 0 ? 1.0 / limit : 0; } // Set worker threads - if ( configEngineThreadJson["workers"].as() == "async" ) { - uf::thread::async = true; - auto threads = std::max( 1, (int) std::thread::hardware_concurrency() - 1 ) / 2; - configEngineThreadJson["workers"] = threads; - uf::thread::workers = configEngineThreadJson["workers"].as(); - UF_MSG_DEBUG("Using async worker threads"); - } else if ( configEngineThreadJson["workers"].as() == "auto" ) { + if ( configEngineThreadJson["workers"].as() == "auto" ) { auto threads = std::max( 1, (int) std::thread::hardware_concurrency() - 1 ) / 2; configEngineThreadJson["workers"] = threads; uf::thread::workers = configEngineThreadJson["workers"].as(); @@ -401,11 +395,12 @@ void EXT_API ext::initialize() { "deferred alias output to swapchain": false, */ + #if 1 ::requestDedicatedRenderThread = configRenderExperimentalJson["dedicated thread"].as( uf::renderer::settings::experimental::dedicatedThread ); - #if 0 + #else uf::renderer::settings::experimental::dedicatedThread = configRenderExperimentalJson["dedicated thread"].as( uf::renderer::settings::experimental::dedicatedThread ); - uf::renderer::settings::experimental::rebuildOnTickBegin = configRenderExperimentalJson["rebuild on tick begin"].as( uf::renderer::settings::experimental::rebuildOnTickBegin ); #endif + uf::renderer::settings::experimental::rebuildOnTickBegin = configRenderExperimentalJson["rebuild on tick begin"].as( uf::renderer::settings::experimental::rebuildOnTickBegin ); uf::renderer::settings::experimental::batchQueueSubmissions = configRenderExperimentalJson["batch queue submissions"].as( uf::renderer::settings::experimental::batchQueueSubmissions ); uf::renderer::settings::invariant::multithreadedRecording = configRenderInvariantJson["multithreaded recording"].as( uf::renderer::settings::invariant::multithreadedRecording ); @@ -492,7 +487,12 @@ void EXT_API ext::initialize() { renderMode.metadata.pipelines.emplace_back("culling"); } } + #if UF_USE_VULKAN + if ( ::json["engine"]["render modes"]["raytrace"].as(false) ) { + auto* renderMode = new uf::renderer::RayTraceRenderMode; + uf::renderer::addRenderMode( renderMode, "RayTrace" ); + } /* Callbacks for 2KHR stuffs */ { uf::hooks.addHook("vulkan:Instance.ExtensionsEnabled", []( const ext::json::Value& json ) { // UF_MSG_DEBUG("vulkan:Instance.ExtensionsEnabled: " << json); @@ -556,7 +556,7 @@ void EXT_API ext::initialize() { uf::renderer::initialize(); } - pod::Thread& threadMain = uf::thread::get("Main"); + pod::Thread& threadMain = uf::thread::get(uf::thread::mainThreadName); #if UF_USE_DISCORD /* Discord */ if ( ::config.engine.ext.discord.enabled ) { ext::discord::initialize(); @@ -585,7 +585,7 @@ void EXT_API ext::initialize() { }; if ( json["immediate"].as() ) function(); - else uf::thread::queue( uf::thread::get("Main"), function ); + else uf::thread::queue( uf::thread::get(uf::thread::mainThreadName), function ); }); uf::hooks.addHook( "game:Scene.Cleanup", [&](ext::json::Value& json){ @@ -681,7 +681,7 @@ void EXT_API ext::tick() { } /* Tick Main Thread Queue */ { - uf::thread::process( uf::thread::get("Main") ); + uf::thread::process( uf::thread::get(uf::thread::mainThreadName) ); } #if UF_USE_ULTRALIGHT /* Ultralight-UX */ if ( ::config.engine.ext.ultralight.enabled ) { @@ -733,7 +733,7 @@ void EXT_API ext::tick() { if ( ::requestDedicatedRenderThread ) { ::requestDedicatedRenderThread = false; uf::renderer::settings::experimental::dedicatedThread = true; - uf::renderer::settings::experimental::rebuildOnTickBegin = true; + // uf::renderer::settings::experimental::rebuildOnTickBegin = true; UF_MSG_DEBUG("Dedicated render requested"); } #endif diff --git a/makefiles/win64.gcc.make b/makefiles/win64.gcc.make index 28165b3d..3757331a 100644 --- a/makefiles/win64.gcc.make +++ b/makefiles/win64.gcc.make @@ -2,6 +2,6 @@ ARCH = win64 CDIR = CC = gcc CXX = g++ -OPTIMIZATIONS = -g -O3 -fstrict-aliasing #-flto +OPTIMIZATIONS = -O3 -g -fstrict-aliasing -DUF_NO_EXCEPTIONS #-flto WARNINGS = -Wall -Wno-unknown-pragmas -Wno-unused-function -Wno-unused-variable -Wno-switch -Wno-reorder -Wno-sign-compare -Wno-unused-but-set-variable -Wno-ignored-attributes -Wno-narrowing -Wno-misleading-indentation FLAGS += -std=c++20 $(OPTIMIZATIONS) $(WARNINGS) -fdiagnostics-color=always \ No newline at end of file diff --git a/program.sh b/program.sh index b77526e4..a71b47b3 100644 --- a/program.sh +++ b/program.sh @@ -1,4 +1,12 @@ #!/bin/bash tskill program cd bin -./program.bat $@ || tskill program \ No newline at end of file + +ARCH=$(cat ./exe/default/arch) +CC=$(cat ./exe/default/cc) +RENDERER=$(cat ./exe/default/renderer) + +export PATH="$(pwd)/exe/lib/${ARCH}/:$(pwd)/exe/lib/${ARCH}/${CC}/${RENDERER}/:${PATH}" + +./exe/program.${ARCH}.${CC}.${RENDERER}.exe $@ +tskill program