diff --git a/README.md b/README.md index 9d4913f..f531373 100644 --- a/README.md +++ b/README.md @@ -797,6 +797,10 @@ To build the FSR2 sample, please follow the following instructions: 3) Open the solutions in the DX12 or Vulkan directory (depending on your preference), compile and run. +4) There is an off-screen version which currently runs on Vulkan,to use this,you need to provide: +- Color textures (RGBA) +- Motion vectors and depth textures(RG for motion vectors and B for depth,and please remain the A channel) + # Limitations FSR2 requires a GPU with typed UAV load and R16G16B16A16_UNORM support. diff --git a/src/VK/CMakeLists.txt b/src/VK/CMakeLists.txt index e849d8c..92a778e 100644 --- a/src/VK/CMakeLists.txt +++ b/src/VK/CMakeLists.txt @@ -49,6 +49,11 @@ set(sources AnimatedTexture.h dpiawarescaling.manifest) +set(sources_offscreen + ./offscreen/main.cpp + ./offscreen/helper.hpp + ./offscreen/Vulkan_FSR2.hpp) + set(fsr1_shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_core.h ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api/shaders/ffx_common_types.h @@ -124,6 +129,7 @@ set(sample_shaders_src set(APP_ICON_GPUOPEN "${CMAKE_CURRENT_SOURCE_DIR}/../common/GpuOpenIcon.rc") source_group("sources" FILES ${sources}) +source_group("sources_offscreen" FILES ${sources_offscreen}) source_group("spatial_shaders" FILES ${fsr1_shaders_src}) source_group("fsr2_shaders" FILES ${fsr2_shaders_src}) source_group("particle_shaders" FILES ${particle_shaders_src}) @@ -146,4 +152,13 @@ target_link_directories(FSR2_Sample_VK PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../ add_dependencies(FSR2_Sample_VK copied_vk_shaders_fsr2_src) set_target_properties(FSR2_Sample_VK PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin" DEBUG_POSTFIX "d") + +add_executable(FSR2_OffScreen_VK ${sources_offscreen} ${fsr2_src} ${fsr1_shaders_src} ${fsr2_shaders_src} ${particle_shaders_src} ${spd_shaders_src} ${common} ${APP_ICON_GPUOPEN}) +target_compile_definitions(FSR2_OffScreen_VK PRIVATE $<$:FSR2_DEBUG_SHADERS=1>) +target_link_libraries(FSR2_OffScreen_VK LINK_PUBLIC Cauldron_VK Vulkan::Vulkan ffx_fsr2_api_x64 ffx_fsr2_api_vk_x64) # ffx_fsr2_api_x64 +target_include_directories(FSR2_OffScreen_VK PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/offscreen/include ${CMAKE_CURRENT_SOURCE_DIR}/../ffx-fsr2-api ${CMAKE_CURRENT_SOURCE_DIR}/../../libs) +target_link_directories(FSR2_OffScreen_VK PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../libs) + +set_target_properties(FSR2_OffScreen_VK PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin" DEBUG_POSTFIX "d") + set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") diff --git a/src/VK/offscreen/Vulkan_FSR2.hpp b/src/VK/offscreen/Vulkan_FSR2.hpp new file mode 100644 index 0000000..8fe4861 --- /dev/null +++ b/src/VK/offscreen/Vulkan_FSR2.hpp @@ -0,0 +1,1120 @@ +#pragma once + +#include "helper.hpp" + +#define STB_IMAGE_IMPLEMENTATION +#include + +//fsr2 stuff +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const std::vector validationLayers = { + "VK_LAYER_KHRONOS_validation" +}; + +const std::vector deviceExtensions = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME +}; + +#define NDEBUG + +#ifdef NDEBUG +const bool enableValidationLayers = false; +#else +const bool enableValidationLayers = true; +#endif + +VkResult CreateDebugUtilsMessengerEXT(VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) { + return func(instance, pCreateInfo, pAllocator, pDebugMessenger); + } + else { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } +} + +void DestroyDebugUtilsMessengerEXT(VkInstance instance, VkDebugUtilsMessengerEXT debugMessenger, const VkAllocationCallbacks* pAllocator) { + auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); + if (func != nullptr) { + func(instance, debugMessenger, pAllocator); + } +} + +struct QueueFamilyIndices { + uint32_t graphicsFamily; + bool isComplete = false; +}; + +inline VkAccessFlags accessFlagsForImageLayout(VkImageLayout layout) { + switch (layout) { + case VK_IMAGE_LAYOUT_PREINITIALIZED: + return VK_ACCESS_HOST_WRITE_BIT; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + return VK_ACCESS_TRANSFER_WRITE_BIT; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_ACCESS_TRANSFER_READ_BIT; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return VK_ACCESS_SHADER_READ_BIT; + default: + return VkAccessFlags(); + } +} + +inline VkPipelineStageFlags pipelineStageForLayout(VkImageLayout layout) { + switch (layout) { + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + return VK_PIPELINE_STAGE_TRANSFER_BIT; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + return VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + case VK_IMAGE_LAYOUT_PREINITIALIZED: + return VK_PIPELINE_STAGE_HOST_BIT; + case VK_IMAGE_LAYOUT_UNDEFINED: + return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + default: + return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } +} + +class Vulkan_FSR2 { +public: + void run(config& config_) { + initVulkan(); + FSR2(config_); + cleanup(); + } + +private: + + VkInstance instance; + VkDebugUtilsMessengerEXT debugMessenger; + + VkPhysicalDevice physicalDevice = VK_NULL_HANDLE; + VkSampleCountFlagBits msaaSamples = VK_SAMPLE_COUNT_1_BIT; + VkDevice device; + + VkQueue graphicsQueue; + + VkCommandPool commandPool; + + //hard-code area + VkImage Fsr2_color_Image; + VkDeviceMemory Fsr2_color_ImageMemory; + VkImageView Fsr2_color_ImageView; + + VkImage Fsr2_motionVectors_Image; + VkDeviceMemory Fsr2_motionVectors_ImageMemory; + VkImageView Fsr2_motionVectors_ImageView; + + VkImage Fsr2_depth_Image; + VkDeviceMemory Fsr2_depth_ImageMemory; + VkImageView Fsr2_depth_ImageView; + + VkImage Fsr2_out_Image; + VkDeviceMemory Fsr2_out_ImageMemory; + VkImageView Fsr2_out_ImageView; + + //------------- + + //fsr2 stuff + FfxFsr2ContextDescription initializationParameters = {}; + FfxFsr2Context context; + double deltaTime; + float scale_motion_x = 1.0f; + float scale_motion_y = 1.0f; + float scale_depth = 1.0f; + bool reset = false; + bool enableJitter = false; + + //----------- + + + void initVulkan() { + createInstance(); + setupDebugMessenger(); + pickPhysicalDevice(); + createLogicalDevice(); + createCommandPool(); + } + + void cleanup() { + + vkDestroyImage(device, Fsr2_color_Image, nullptr); + vkFreeMemory(device, Fsr2_color_ImageMemory, nullptr); + vkDestroyImageView(device, Fsr2_color_ImageView, nullptr); + + vkDestroyImage(device, Fsr2_motionVectors_Image, nullptr); + vkFreeMemory(device, Fsr2_motionVectors_ImageMemory, nullptr); + vkDestroyImageView(device, Fsr2_motionVectors_ImageView, nullptr); + + vkDestroyImage(device, Fsr2_depth_Image, nullptr); + vkFreeMemory(device, Fsr2_depth_ImageMemory, nullptr); + vkDestroyImageView(device, Fsr2_depth_ImageView, nullptr); + + vkDestroyImage(device, Fsr2_out_Image, nullptr); + vkFreeMemory(device, Fsr2_out_ImageMemory, nullptr); + vkDestroyImageView(device, Fsr2_out_ImageView, nullptr); + + ffxFsr2ContextDestroy(&context); + + vkDestroyCommandPool(device, commandPool, nullptr); + + vkDestroyDevice(device, nullptr); + + if (enableValidationLayers) { + DestroyDebugUtilsMessengerEXT(instance, debugMessenger, nullptr); + } + + vkDestroyInstance(instance, nullptr); + } + + void createInstance() { + if (enableValidationLayers && !checkValidationLayerSupport()) { + throw std::runtime_error("validation layers requested, but not available!"); + } + + VkApplicationInfo appInfo{}; + appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.pApplicationName = "Hello Triangle"; + appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + appInfo.pEngineName = "No Engine"; + appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); + appInfo.apiVersion = VK_API_VERSION_1_0; + + VkInstanceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + createInfo.pApplicationInfo = &appInfo; + + auto extensions = getRequiredExtensions(); + createInfo.enabledExtensionCount = static_cast(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + + VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo{}; + if (enableValidationLayers) { + createInfo.enabledLayerCount = static_cast(validationLayers.size()); + createInfo.ppEnabledLayerNames = validationLayers.data(); + + populateDebugMessengerCreateInfo(debugCreateInfo); + createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo; + } + else { + createInfo.enabledLayerCount = 0; + + createInfo.pNext = nullptr; + } + + if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) { + throw std::runtime_error("failed to create instance!"); + } + } + + void populateDebugMessengerCreateInfo(VkDebugUtilsMessengerCreateInfoEXT& createInfo) { + createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + createInfo.pfnUserCallback = debugCallback; + } + + void setupDebugMessenger() { + if (!enableValidationLayers) return; + + VkDebugUtilsMessengerCreateInfoEXT createInfo; + populateDebugMessengerCreateInfo(createInfo); + + if (CreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr, &debugMessenger) != VK_SUCCESS) { + throw std::runtime_error("failed to set up debug messenger!"); + } + } + + void pickPhysicalDevice() { + uint32_t deviceCount = 0; + vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr); + + if (deviceCount == 0) { + throw std::runtime_error("failed to find GPUs with Vulkan support!"); + } + + std::vector devices(deviceCount); + vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data()); + + for (const auto& device : devices) { + if (isDeviceSuitable(device)) { + physicalDevice = device; + msaaSamples = getMaxUsableSampleCount(); + break; + } + } + + if (physicalDevice == VK_NULL_HANDLE) { + throw std::runtime_error("failed to find a suitable GPU!"); + } + } + + void createLogicalDevice() { + QueueFamilyIndices indices = findQueueFamilies(physicalDevice); + + std::vector queueCreateInfos; + std::set uniqueQueueFamilies = { indices.graphicsFamily }; + + float queuePriority = 1.0f; + for (uint32_t queueFamily : uniqueQueueFamilies) { + VkDeviceQueueCreateInfo queueCreateInfo{}; + queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueCreateInfo.queueFamilyIndex = queueFamily; + queueCreateInfo.queueCount = 1; + queueCreateInfo.pQueuePriorities = &queuePriority; + queueCreateInfos.push_back(queueCreateInfo); + } + + VkPhysicalDeviceFeatures deviceFeatures{}; + + deviceFeatures.samplerAnisotropy = VK_TRUE; + + VkDeviceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + + createInfo.queueCreateInfoCount = static_cast(queueCreateInfos.size()); + createInfo.pQueueCreateInfos = queueCreateInfos.data(); + + createInfo.pEnabledFeatures = &deviceFeatures; + + createInfo.enabledExtensionCount = static_cast(deviceExtensions.size()); + createInfo.ppEnabledExtensionNames = deviceExtensions.data(); + + if (enableValidationLayers) { + createInfo.enabledLayerCount = static_cast(validationLayers.size()); + createInfo.ppEnabledLayerNames = validationLayers.data(); + } + else { + createInfo.enabledLayerCount = 0; + } + + if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) != VK_SUCCESS) { + throw std::runtime_error("failed to create logical device!"); + } + + vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphicsQueue); + } + + void createCommandPool() { + QueueFamilyIndices queueFamilyIndices = findQueueFamilies(physicalDevice); + + VkCommandPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily; + + if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) { + throw std::runtime_error("failed to create graphics command pool!"); + } + } + + VkFormat findSupportedFormat(const std::vector& candidates, VkImageTiling tiling, VkFormatFeatureFlags features) { + for (VkFormat format : candidates) { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &props); + + if (tiling == VK_IMAGE_TILING_LINEAR && (props.linearTilingFeatures & features) == features) { + return format; + } + else if (tiling == VK_IMAGE_TILING_OPTIMAL && (props.optimalTilingFeatures & features) == features) { + return format; + } + } + + throw std::runtime_error("failed to find supported format!"); + } + + VkFormat findDepthFormat() { + return findSupportedFormat( + { VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT }, + VK_IMAGE_TILING_OPTIMAL, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT + ); + } + + bool hasStencilComponent(VkFormat format) { + return format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_D24_UNORM_S8_UINT; + } + + VkSampleCountFlagBits getMaxUsableSampleCount() { + VkPhysicalDeviceProperties physicalDeviceProperties; + vkGetPhysicalDeviceProperties(physicalDevice, &physicalDeviceProperties); + + VkSampleCountFlags counts = physicalDeviceProperties.limits.framebufferColorSampleCounts & physicalDeviceProperties.limits.framebufferDepthSampleCounts; + if (counts & VK_SAMPLE_COUNT_64_BIT) { return VK_SAMPLE_COUNT_64_BIT; } + if (counts & VK_SAMPLE_COUNT_32_BIT) { return VK_SAMPLE_COUNT_32_BIT; } + if (counts & VK_SAMPLE_COUNT_16_BIT) { return VK_SAMPLE_COUNT_16_BIT; } + if (counts & VK_SAMPLE_COUNT_8_BIT) { return VK_SAMPLE_COUNT_8_BIT; } + if (counts & VK_SAMPLE_COUNT_4_BIT) { return VK_SAMPLE_COUNT_4_BIT; } + if (counts & VK_SAMPLE_COUNT_2_BIT) { return VK_SAMPLE_COUNT_2_BIT; } + + return VK_SAMPLE_COUNT_1_BIT; + } + + VkImageView createImageView(VkImage image, VkFormat format, VkImageAspectFlags aspectFlags, uint32_t mipLevels) { + VkImageViewCreateInfo viewInfo{}; + viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + viewInfo.image = image; + viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = aspectFlags; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = mipLevels; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + VkImageView imageView; + if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) != VK_SUCCESS) { + throw std::runtime_error("failed to create texture image view!"); + } + + return imageView; + } + + void createImage(uint32_t width, uint32_t height, uint32_t mipLevels, VkSampleCountFlagBits numSamples, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage, VkMemoryPropertyFlags properties, VkImage& image, VkDeviceMemory& imageMemory) { + VkImageCreateInfo imageInfo{}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = 1; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.usage = usage; + imageInfo.samples = numSamples; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS) { + throw std::runtime_error("failed to create image!"); + } + + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(device, image, &memRequirements); + + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) != VK_SUCCESS) { + throw std::runtime_error("failed to allocate image memory!"); + } + + vkBindImageMemory(device, image, imageMemory, 0); + } + + void transitionImageLayout(VkImage image, VkFormat format, VkImageLayout oldLayout, VkImageLayout newLayout, uint32_t mipLevels, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT) { + VkCommandBuffer commandBuffer = beginSingleTimeCommands(); + + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = aspectMask; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + VkPipelineStageFlags sourceStage; + VkPipelineStageFlags destinationStage; + + if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + } + else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + else + { + barrier.srcAccessMask = accessFlagsForImageLayout(oldLayout); + barrier.dstAccessMask = accessFlagsForImageLayout(newLayout); + + sourceStage = pipelineStageForLayout(oldLayout); + destinationStage = pipelineStageForLayout(newLayout); + } + /*else { + throw std::invalid_argument("unsupported layout transition!"); + }*/ + + vkCmdPipelineBarrier( + commandBuffer, + sourceStage, destinationStage, + 0, + 0, nullptr, + 0, nullptr, + 1, &barrier + ); + + endSingleTimeCommands(commandBuffer); + } + + void copyBufferToImage(VkBuffer buffer, VkImage image, uint32_t width, uint32_t height, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT) { + VkCommandBuffer commandBuffer = beginSingleTimeCommands(); + + VkBufferImageCopy region{}; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = aspectMask; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = { 0, 0, 0 }; + region.imageExtent = { + width, + height, + 1 + }; + + vkCmdCopyBufferToImage(commandBuffer, buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + endSingleTimeCommands(commandBuffer); + } + + void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory) + { + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) + { + throw std::runtime_error("failed to create buffer!"); + } + + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(device, buffer, &memRequirements); + + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) { + throw std::runtime_error("failed to allocate buffer memory!"); + } + + vkBindBufferMemory(device, buffer, bufferMemory, 0); + } + + VkCommandBuffer beginSingleTimeCommands() { + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = commandPool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + return commandBuffer; + } + + void endSingleTimeCommands(VkCommandBuffer commandBuffer) { + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE); + vkQueueWaitIdle(graphicsQueue); + + vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer); + } + + void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) { + VkCommandBuffer commandBuffer = beginSingleTimeCommands(); + + VkBufferCopy copyRegion{}; + copyRegion.size = size; + vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ©Region); + + endSingleTimeCommands(commandBuffer); + } + + uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) { + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) { + return i; + } + } + + throw std::runtime_error("failed to find suitable memory type!"); + } + + VkSurfaceFormatKHR chooseSwapSurfaceFormat(const std::vector& availableFormats) { + for (const auto& availableFormat : availableFormats) { + if (availableFormat.format == VK_FORMAT_B8G8R8A8_SRGB && availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { + return availableFormat; + } + } + + return availableFormats[0]; + } + + VkPresentModeKHR chooseSwapPresentMode(const std::vector& availablePresentModes) { + for (const auto& availablePresentMode : availablePresentModes) { + if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) { + return availablePresentMode; + } + } + + return VK_PRESENT_MODE_FIFO_KHR; + } + + bool isDeviceSuitable(VkPhysicalDevice device) { + QueueFamilyIndices indices = findQueueFamilies(device); + + bool extensionsSupported = checkDeviceExtensionSupport(device); + + VkPhysicalDeviceFeatures supportedFeatures; + vkGetPhysicalDeviceFeatures(device, &supportedFeatures); + + return indices.isComplete && extensionsSupported && supportedFeatures.samplerAnisotropy; + + } + + bool checkDeviceExtensionSupport(VkPhysicalDevice device) { + uint32_t extensionCount; + vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, nullptr); + + std::vector availableExtensions(extensionCount); + vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, availableExtensions.data()); + + std::set requiredExtensions(deviceExtensions.begin(), deviceExtensions.end()); + + for (const auto& extension : availableExtensions) { + requiredExtensions.erase(extension.extensionName); + } + + return requiredExtensions.empty(); + } + + QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) { + QueueFamilyIndices indices; + + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, nullptr); + + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, queueFamilies.data()); + + int i = 0; + for (const auto& queueFamily : queueFamilies) { + if (queueFamily.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT)) { + indices.graphicsFamily = i; + indices.isComplete = true; + break; + } + + i++; + } + + return indices; + } + + std::vector getRequiredExtensions() { + + std::vector extensions; + + if (enableValidationLayers) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + return extensions; + } + + bool checkValidationLayerSupport() { + uint32_t layerCount; + vkEnumerateInstanceLayerProperties(&layerCount, nullptr); + + std::vector availableLayers(layerCount); + vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data()); + + for (const char* layerName : validationLayers) { + bool layerFound = false; + + for (const auto& layerProperties : availableLayers) { + if (strcmp(layerName, layerProperties.layerName) == 0) { + layerFound = true; + break; + } + } + + if (!layerFound) { + return false; + } + } + + return true; + } + + static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) { + std::cerr << "validation layer: " << pCallbackData->pMessage << std::endl; + + return VK_FALSE; + } + + //hard-code area + void copyImageToBuffer(VkBuffer buffer, VkImage image, uint32_t width, uint32_t height) { + VkCommandBuffer commandBuffer = beginSingleTimeCommands(); + + VkBufferImageCopy region{}; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = { 0, 0, 0 }; + region.imageExtent = { + width, + height, + 1 + }; + + vkCmdCopyImageToBuffer(commandBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1, ®ion); + + endSingleTimeCommands(commandBuffer); + } + void CreateFSR2Image(std::string input_path, int& texWidth, int& texHeight, int& texChannels, VkImage& Fsr2_input_Image, VkDeviceMemory& Fsr2_input_ImageMemory, VkImageView& Fsr2_input_ImageView) + { + + texWidth = 2; texHeight = 2; + //mipLevels = static_cast(std::floor(std::log2(std::max(texWidth, texHeight)))) + 1; + int mipLevels_fsr = 1; + std::vector data_float = read_image_exr(input_path.c_str(), texWidth, texHeight, texChannels); + VkDeviceSize imageSize = texWidth * texHeight * sizeof(float) * texChannels; + /*std::vector data_float = { + 1.0,0.5,0.5,1.0, + 0.5,1.0,1.0,1.0, + 0.5,0.5,1.0,1.0, + 1.0,1.0,1.0,1.0 + }; + */ + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data); + memcpy(data, data_float.data(), static_cast(imageSize)); + vkUnmapMemory(device, stagingBufferMemory); + + createImage(texWidth, texHeight, mipLevels_fsr, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, Fsr2_input_Image, Fsr2_input_ImageMemory); + + transitionImageLayout(Fsr2_input_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr); + copyBufferToImage(stagingBuffer, Fsr2_input_Image, static_cast(texWidth), static_cast(texHeight)); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer, nullptr); + vkFreeMemory(device, stagingBufferMemory, nullptr); + + Fsr2_input_ImageView = createImageView(Fsr2_input_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, mipLevels_fsr); + } + + void CreateFSR2Image_MotionDepth(std::string input_path, int& texWidth, int& texHeight, int& texChannels, VkImage& Fsr2_motion, VkDeviceMemory& Fsr2_motionMemory, VkImageView& Fsr2_motion_ImageView, VkImage& Fsr2_depth, VkDeviceMemory& Fsr2_depthMemory, VkImageView& Fsr2_depthMemory_ImageView) + { + + texWidth = 2; texHeight = 2; + //mipLevels = static_cast(std::floor(std::log2(std::max(texWidth, texHeight)))) + 1; + int mipLevels_fsr = 1; + std::vector data_float = read_image_exr(input_path.c_str(), texWidth, texHeight, texChannels); + VkDeviceSize imageSize_motion = texWidth * texHeight * sizeof(float) * 2; + VkDeviceSize imageSize_depth = texWidth * texHeight * sizeof(float); + std::vector data_motion; + std::vector data_depth; + + for (int i = 0; i < texWidth * texHeight; i++) { + data_motion.push_back(scale_motion_x * data_float[texChannels * i + 0]); + data_motion.push_back(scale_motion_y * data_float[texChannels * i + 1]); + data_depth.push_back(1.0f / (scale_depth * data_float[texChannels * i + 2] + 0.00001f)); + } + + VkBuffer stagingBuffer_motion; + VkDeviceMemory stagingBufferMemory_motion; + createBuffer(imageSize_motion, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer_motion, stagingBufferMemory_motion); + + void* dataMotion; + vkMapMemory(device, stagingBufferMemory_motion, 0, imageSize_motion, 0, &dataMotion); + memcpy(dataMotion, data_motion.data(), static_cast(imageSize_motion)); + vkUnmapMemory(device, stagingBufferMemory_motion); + //motion image + createImage(texWidth, texHeight, mipLevels_fsr, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, Fsr2_motion, Fsr2_motionMemory); + + transitionImageLayout(Fsr2_motion, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr); + copyBufferToImage(stagingBuffer_motion, Fsr2_motion, static_cast(texWidth), static_cast(texHeight)); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer_motion, nullptr); + vkFreeMemory(device, stagingBufferMemory_motion, nullptr); + + //do the depth image + VkBuffer stagingBuffer_depth; + VkDeviceMemory stagingBufferMemory_depth; + createBuffer(imageSize_depth, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer_depth, stagingBufferMemory_depth); + + void* dataDepth; + vkMapMemory(device, stagingBufferMemory_depth, 0, imageSize_depth, 0, &dataDepth); + memcpy(dataDepth, data_depth.data(), static_cast(imageSize_depth)); + vkUnmapMemory(device, stagingBufferMemory_depth); + //depth image + createImage(texWidth, texHeight, mipLevels_fsr, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, Fsr2_depth, Fsr2_depthMemory); + + transitionImageLayout(Fsr2_depth, VK_FORMAT_D32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr, VK_IMAGE_ASPECT_DEPTH_BIT); + copyBufferToImage(stagingBuffer_depth, Fsr2_depth, static_cast(texWidth), static_cast(texHeight), VK_IMAGE_ASPECT_DEPTH_BIT); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer_depth, nullptr); + vkFreeMemory(device, stagingBufferMemory_depth, nullptr); + + Fsr2_motion_ImageView = createImageView(Fsr2_motion, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, mipLevels_fsr); + Fsr2_depth_ImageView = createImageView(Fsr2_depth, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_DEPTH_BIT, mipLevels_fsr); + } + + void UpdateFSR2Image(std::string input_path, int& texWidth, int& texHeight, int& texChannels, VkImage& Fsr2_input_Image, VkDeviceMemory& Fsr2_input_ImageMemory, VkImageView& Fsr2_input_ImageView) + { + + texWidth = 2; texHeight = 2; + //mipLevels = static_cast(std::floor(std::log2(std::max(texWidth, texHeight)))) + 1; + int mipLevels_fsr = 1; + std::vector data_float = read_image_exr(input_path.c_str(), texWidth, texHeight, texChannels); + VkDeviceSize imageSize = texWidth * texHeight * sizeof(float) * texChannels; + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data); + memcpy(data, data_float.data(), static_cast(imageSize)); + vkUnmapMemory(device, stagingBufferMemory); + + transitionImageLayout(Fsr2_input_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr); + copyBufferToImage(stagingBuffer, Fsr2_input_Image, static_cast(texWidth), static_cast(texHeight)); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer, nullptr); + vkFreeMemory(device, stagingBufferMemory, nullptr); + + vkDestroyImageView(device, Fsr2_input_ImageView, nullptr); + + Fsr2_input_ImageView = createImageView(Fsr2_input_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, mipLevels_fsr); + } + + void UpdateFSR2Image_MotionDepth(std::string input_path, int& texWidth, int& texHeight, int& texChannels, VkImage& Fsr2_motion, VkDeviceMemory& Fsr2_motionMemory, VkImageView& Fsr2_motion_ImageView, VkImage& Fsr2_depth, VkDeviceMemory& Fsr2_depthMemory, VkImageView& Fsr2_depthMemory_ImageView) + { + + texWidth = 2; texHeight = 2; + //mipLevels = static_cast(std::floor(std::log2(std::max(texWidth, texHeight)))) + 1; + int mipLevels_fsr = 1; + std::vector data_float = read_image_exr(input_path.c_str(), texWidth, texHeight, texChannels); + VkDeviceSize imageSize_motion = texWidth * texHeight * sizeof(float) * 2; + VkDeviceSize imageSize_depth = texWidth * texHeight * sizeof(float); + std::vector data_motion; + std::vector data_depth; + + for (int i = 0; i < texWidth * texHeight; i++) { + data_motion.push_back(scale_motion_x * data_float[texChannels * i + 0]); + data_motion.push_back(scale_motion_y * data_float[texChannels * i + 1]); + data_depth.push_back(1.0f/(scale_depth * data_float[texChannels * i + 2]+0.00001f)); + } + + VkBuffer stagingBuffer_motion; + VkDeviceMemory stagingBufferMemory_motion; + createBuffer(imageSize_motion, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer_motion, stagingBufferMemory_motion); + + void* dataMotion; + vkMapMemory(device, stagingBufferMemory_motion, 0, imageSize_motion, 0, &dataMotion); + memcpy(dataMotion, data_motion.data(), static_cast(imageSize_motion)); + vkUnmapMemory(device, stagingBufferMemory_motion); + //motion image + + transitionImageLayout(Fsr2_motion, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr); + copyBufferToImage(stagingBuffer_motion, Fsr2_motion, static_cast(texWidth), static_cast(texHeight)); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer_motion, nullptr); + vkFreeMemory(device, stagingBufferMemory_motion, nullptr); + + //do the depth image + VkBuffer stagingBuffer_depth; + VkDeviceMemory stagingBufferMemory_depth; + createBuffer(imageSize_depth, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer_depth, stagingBufferMemory_depth); + + void* dataDepth; + vkMapMemory(device, stagingBufferMemory_depth, 0, imageSize_depth, 0, &dataDepth); + memcpy(dataDepth, data_depth.data(), static_cast(imageSize_depth)); + vkUnmapMemory(device, stagingBufferMemory_depth); + //depth image + + transitionImageLayout(Fsr2_depth, VK_FORMAT_D32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, mipLevels_fsr, VK_IMAGE_ASPECT_DEPTH_BIT); + copyBufferToImage(stagingBuffer_depth, Fsr2_depth, static_cast(texWidth), static_cast(texHeight), VK_IMAGE_ASPECT_DEPTH_BIT); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + vkDestroyBuffer(device, stagingBuffer_depth, nullptr); + vkFreeMemory(device, stagingBufferMemory_depth, nullptr); + + vkDestroyImageView(device, Fsr2_motion_ImageView, nullptr); + vkDestroyImageView(device, Fsr2_depth_ImageView, nullptr); + + Fsr2_motion_ImageView = createImageView(Fsr2_motion, VK_FORMAT_R32G32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, mipLevels_fsr); + Fsr2_depth_ImageView = createImageView(Fsr2_depth, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_DEPTH_BIT, mipLevels_fsr); + } + + void DownloadFSR2Image(std::string &index, std::string &output_path, int& texWidth, int& texHeight, int& texChannels, VkImage& Fsr2_input_Image, VkDeviceMemory& Fsr2_input_ImageMemory) + { + VkDeviceSize imageSize = texWidth * texHeight * sizeof(float) * texChannels; + //mipLevels = static_cast(std::floor(std::log2(std::max(texWidth, texHeight)))) + 1; + int mipLevels_fsr = 1; + std::vector data_float; + data_float.resize(texWidth * texHeight * texChannels); + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); + + transitionImageLayout(Fsr2_input_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, mipLevels_fsr); + copyImageToBuffer(stagingBuffer, Fsr2_input_Image, static_cast(texWidth), static_cast(texHeight)); + //transitioned to VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL while generating mipmaps + + //copy data from buffer + void* data; + vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data); + memcpy(data_float.data(), data, static_cast(imageSize)); + vkUnmapMemory(device, stagingBufferMemory); + + vkDestroyBuffer(device, stagingBuffer, nullptr); + vkFreeMemory(device, stagingBufferMemory, nullptr); + + //work with the vector + std::string file_name = "result_" + index + ".exr"; + std::string output_exr = output_path + file_name; + + write_exr(output_exr, data_float, texWidth, texHeight, texChannels, false); + } + void Fsr2_work_init(uint32_t renderWidth, + uint32_t renderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + bool hdr, + bool InvertedDepth) + { + const size_t scratchBufferSize = ffxFsr2GetScratchMemorySizeVK(physicalDevice); + void* scratchBuffer = malloc(scratchBufferSize); + FfxErrorCode errorCode = ffxFsr2GetInterfaceVK(&initializationParameters.callbacks, scratchBuffer, scratchBufferSize, physicalDevice, vkGetDeviceProcAddr); + assert(errorCode == FFX_OK); + + initializationParameters.device = ffxGetDeviceVK(device); + initializationParameters.maxRenderSize.width = renderWidth; + initializationParameters.maxRenderSize.height = renderHeight; + initializationParameters.displaySize.width = displayWidth; + initializationParameters.displaySize.height = displayHeight; + initializationParameters.flags = FFX_FSR2_ENABLE_AUTO_EXPOSURE; + + if (InvertedDepth) { + initializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED; + } + + if (hdr) { + initializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + } + + ffxFsr2ContextCreate(&context, &initializationParameters); + } + void Fsr2_work_OnRender(uint32_t index,uint32_t renderWidth, uint32_t renderHeight, uint32_t displayWidth, uint32_t displayHeight) + { + VkCommandBuffer commandBuffer = beginSingleTimeCommands(); + + FfxFsr2DispatchDescription dispatchParameters = {}; + dispatchParameters.commandList = ffxGetCommandListVK(commandBuffer); + dispatchParameters.color = ffxGetTextureResourceVK(&context, Fsr2_color_Image, Fsr2_color_ImageView, renderWidth, renderHeight, VK_FORMAT_R32G32B32A32_SFLOAT, L"FSR2_InputColor"); + dispatchParameters.depth = ffxGetTextureResourceVK(&context, Fsr2_depth_Image, Fsr2_depth_ImageView, renderWidth, renderHeight, VK_FORMAT_D32_SFLOAT, L"FSR2_InputDepth"); + dispatchParameters.motionVectors = ffxGetTextureResourceVK(&context, Fsr2_motionVectors_Image, Fsr2_motionVectors_ImageView, renderWidth, renderHeight, VK_FORMAT_R32G32_SFLOAT, L"FSR2_InputMotionVectors"); + dispatchParameters.exposure = ffxGetTextureResourceVK(&context, nullptr, nullptr, 1, 1, VK_FORMAT_UNDEFINED, L"FSR2_InputExposure"); + + //no reactive + dispatchParameters.reactive = ffxGetTextureResourceVK(&context, nullptr, nullptr, 1, 1, VK_FORMAT_UNDEFINED, L"FSR2_EmptyInputReactiveMap"); + //no transparency and compositon + dispatchParameters.transparencyAndComposition = ffxGetTextureResourceVK(&context, nullptr, nullptr, 1, 1, VK_FORMAT_UNDEFINED, L"FSR2_EmptyTransparencyAndCompositionMap"); + + float JitterX = 0.0f; + float JitterY = 0.0f; + + if (enableJitter) + { + const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(renderWidth, displayWidth); + ffxFsr2GetJitterOffset(&JitterX, &JitterY, index, jitterPhaseCount); + } + + dispatchParameters.output = ffxGetTextureResourceVK(&context, Fsr2_out_Image, Fsr2_out_ImageView, displayWidth, displayHeight, VK_FORMAT_R32G32B32A32_SFLOAT, L"FSR2_OutputUpscaledColor", FFX_RESOURCE_STATE_UNORDERED_ACCESS); + dispatchParameters.jitterOffset.x = JitterX; + dispatchParameters.jitterOffset.y = JitterY; + dispatchParameters.motionVectorScale.x = (float)renderWidth; + dispatchParameters.motionVectorScale.y = (float)renderHeight; + dispatchParameters.reset = reset; + dispatchParameters.enableSharpening = false; + dispatchParameters.sharpness = 0.0f; + dispatchParameters.frameTimeDelta = (float)deltaTime; + dispatchParameters.preExposure = 1.0f; + dispatchParameters.renderSize.width = renderWidth; + dispatchParameters.renderSize.height = renderHeight; + dispatchParameters.cameraFar = 0.01f; + dispatchParameters.cameraNear = 1e8; + dispatchParameters.cameraFovAngleVertical = 1.56f;//fov 90กใ + + + FfxErrorCode errorCode = ffxFsr2ContextDispatch(&context, &dispatchParameters); + assert(errorCode == FFX_OK); + + endSingleTimeCommands(commandBuffer); + } + void Fsr2_work(std::string &output_path, std::vector &filenames,int index, std::vector& color_list, std::vector& motionVector_list, + uint32_t renderWidth, + uint32_t renderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + int outChannels, + bool hdr, + bool InvertedDepth) + { + Fsr2_work_init(renderWidth, renderHeight, displayWidth, displayHeight, hdr, InvertedDepth); + for (int i = index; i <= color_list.size(); i++) + { + clock_t cost_time = clock(); + Fsr2_work_OnRender(i,renderWidth, renderHeight, displayWidth, displayHeight); + clock_t render_cost_time = clock() - cost_time; + + int width = displayWidth; + int height = displayHeight; + std::string file_name = filenames[i - 1]; + int suffix_pos = file_name.find(".exr"); + if (suffix_pos != file_name.npos) + { + file_name = file_name.substr(0, suffix_pos); + } + + DownloadFSR2Image(file_name, output_path, width, height, outChannels, Fsr2_out_Image, Fsr2_out_ImageMemory); + + cost_time = clock() - (render_cost_time + cost_time); + printf("%s ",filenames[i - 1].c_str()); + printf("(%d / %d) done. render cost %f s, download image cost %f s.\n", i, (int)color_list.size(), (float)((float)render_cost_time / CLOCKS_PER_SEC), (float)((float)cost_time / CLOCKS_PER_SEC)); + + if (i == color_list.size()) break; + + int texWidth, texHeight, texChannels = 4; + UpdateFSR2Image(color_list[i], texWidth, texHeight, texChannels, Fsr2_color_Image, Fsr2_color_ImageMemory, Fsr2_color_ImageView); + int texWidth_, texHeight_, texChannels_ = 4; + UpdateFSR2Image_MotionDepth(motionVector_list[i], texWidth_, texHeight_, texChannels_, + Fsr2_motionVectors_Image, Fsr2_motionVectors_ImageMemory, Fsr2_motionVectors_ImageView, + Fsr2_depth_Image, Fsr2_depth_ImageMemory, Fsr2_depth_ImageView); + + + } + } + + void FSR2(config& config_) + { + std::vector source_list_color; + std::vector source_list_motionDepth; + for (int i = 0; i < config_.file_list.size(); i++) + { + std::string file_name = config_.file_list[i]; + std::string color_path = config_.input_path + file_name; + std::string MotionDepth_path = config_.motion_depth_path + file_name; + source_list_color.push_back(color_path); + source_list_motionDepth.push_back(MotionDepth_path); + } + scale_motion_x = config_.scale_motion_x; + scale_motion_y = config_.scale_motion_y; + scale_depth = config_.scale_depth; + deltaTime = config_.deltaTime; + + printf("hello,I'm ready to go!\n"); + int texWidth, texHeight; + int texChannels = 4;//TODO: support 3 channels + CreateFSR2Image(source_list_color[0], texWidth, texHeight, texChannels, Fsr2_color_Image, Fsr2_color_ImageMemory, Fsr2_color_ImageView); + int texWidth_, texHeight_, texChannels_ = 4; + CreateFSR2Image_MotionDepth(source_list_motionDepth[0], texWidth_, texHeight_, texChannels_, + Fsr2_motionVectors_Image, Fsr2_motionVectors_ImageMemory, Fsr2_motionVectors_ImageView, + Fsr2_depth_Image, Fsr2_depth_ImageMemory, Fsr2_depth_ImageView); + + //Let's do it! + int outWidth, outHeight, outChannels; + outWidth = config_.outWidth; + outHeight = config_.outHeight; + outChannels = texChannels; + enableJitter = config_.enableJitter; + reset = config_.reset; + + //create output result + createImage(outWidth, outHeight, 1, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, Fsr2_out_Image, Fsr2_out_ImageMemory); + Fsr2_out_ImageView = createImageView(Fsr2_out_Image, VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, 1); + + Fsr2_work(config_.out_path, config_.file_list, 1, source_list_color, source_list_motionDepth, texWidth, texHeight, outWidth, outHeight, outChannels, true, false); + + printf("I am done.\n"); + } +}; diff --git a/src/VK/offscreen/helper.hpp b/src/VK/offscreen/helper.hpp new file mode 100644 index 0000000..3963265 --- /dev/null +++ b/src/VK/offscreen/helper.hpp @@ -0,0 +1,123 @@ +#pragma once +#define TINYEXR_IMPLEMENTATION +#include +#include +#include + +struct config +{ + std::vector file_list; + std::string input_path = ""; + std::string motion_depth_path = ""; + std::string out_path = ""; + float scale_motion_x = 1.0f; + float scale_motion_y = 1.0f; + float scale_depth = 1.0f; + double deltaTime = 33.3f; + int outWidth = 1; + int outHeight = 1; + bool reset = false; + bool enableJitter = false; +}; + +bool get_files(std::vector& fileNames, std::string& input_path) +{ + FindFiles finder; + fileNames = finder.findFiles(input_path.c_str()); + if (fileNames.empty()) return false; + std::sort(fileNames.begin(), fileNames.end()); + return true; +} + +std::vector read_image_exr(const std::string& file_path, int& w, int& h,int channel) +{ + float* data = nullptr; + const char* err = nullptr; + int ret = LoadEXR(&data, &w, &h, file_path.c_str(), &err); + if (ret != TINYEXR_SUCCESS) { + if (err) { + std::cout << "EXR reading error: " << err << "\n"; + FreeEXRErrorMessage(err); + } + } + std::vector data_rgba{ data ,data+(w*h)*channel}; // auto free + return data_rgba; +} + +void write_exr(const std::string& _file_path, std::vector &data, int w, int h, int _channels=4, + bool use_half = false) +{ + if (data.empty()) + return; + std::string file_path = _file_path; + + EXRHeader header; + InitEXRHeader(&header); + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = _channels; + + int pixel_n = w * h; + std::vector images[4]; + images[0].resize(pixel_n); + images[1].resize(pixel_n); + images[2].resize(pixel_n); + images[3].resize(pixel_n); + + for (int i = 0; i < pixel_n; i++) { + images[0][i] = data[4 * i + 0]; + images[1][i] = data[4 * i + 1]; + images[2][i] = data[4 * i + 2]; + images[3][i] = data[4 * i + 3]; + } + + float* image_ptr[4]; + image_ptr[0] = images[3].data(); // A + image_ptr[1] = images[2].data(); // B + image_ptr[2] = images[1].data(); // G + image_ptr[3] = images[0].data(); // R + + + image.images = (uint8_t**)image_ptr; + image.width = w; + image.height = h; + + header.num_channels = 4; + auto channels = std::make_unique(header.num_channels); + header.channels = channels.get(); + // Must be (A)BGR order, since most of EXR viewers expect this channel order + strncpy_s(header.channels[0].name, "A", 255); + header.channels[0].name[strlen("A")] = '\0'; + strncpy_s(header.channels[1].name, "B", 255); + header.channels[1].name[strlen("B")] = '\0'; + strncpy_s(header.channels[2].name, "G", 255); + header.channels[2].name[strlen("G")] = '\0'; + strncpy_s(header.channels[3].name, "R", 255); + header.channels[3].name[strlen("R")] = '\0'; + + auto pixel_types = std::make_unique(header.num_channels); + header.pixel_types = pixel_types.get(); + auto requested_pixel_types = std::make_unique(header.num_channels); + header.requested_pixel_types = requested_pixel_types.get(); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + header.requested_pixel_types[i] = + use_half ? // pixel type of output image to be stored in EXR file + TINYEXR_PIXELTYPE_HALF : + TINYEXR_PIXELTYPE_FLOAT; + } + + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + + const char* err = nullptr; + int ret = SaveEXRImageToFile(&image, &header, file_path.c_str(), &err); + if (ret != TINYEXR_SUCCESS) { + if (err) { + std::cout << "EXR writing error: " << err << std::endl; + FreeEXRErrorMessage(err); + } + return; + }; +} \ No newline at end of file diff --git a/src/VK/offscreen/include/FindFiles.h b/src/VK/offscreen/include/FindFiles.h new file mode 100644 index 0000000..53daae1 --- /dev/null +++ b/src/VK/offscreen/include/FindFiles.h @@ -0,0 +1,143 @@ +/* + * Find and generate a file list of the folder. +**/ + +#ifndef FIND_FILES_H +#define FIND_FILES_H + +#include +#include + +class FindFiles +{ +public: + FindFiles(){} + ~FindFiles(){} + + std::vector findFiles( const char *lpPath, const char *secName = ".*" ); + +private: + std::vector file_lists; +}; + +#include +#include +#include +#include + +#ifdef WIN32 + +#include +#include + +std::vector FindFiles::findFiles( const char *lpPath, const char *secName /*= ".*" */ ) +{ + char szFind[MAX_PATH]; + char szFile[MAX_PATH]; + + WIN32_FIND_DATA FindFileData; + + strcpy(szFind,lpPath); + strcat(szFind,"\\*"); + strcat(szFind,secName); + + HANDLE hFind=::FindFirstFile(szFind,&FindFileData); + + if(INVALID_HANDLE_VALUE == hFind) + { + std::cout << "Empty folder!" << std::endl; + return std::vector(); + } + + do + { + if(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + if(FindFileData.cFileName[0]!='.') + { + strcpy(szFile,lpPath); + strcat(szFile,"\\"); + strcat(szFile,FindFileData.cFileName); + findFiles(szFile); + } + } + else + { + if ( szFile[0] ) + { + std::string filePath = lpPath; + filePath += "\\"; + filePath = FindFileData.cFileName; + file_lists.push_back(filePath); + } + else + { + std::string filePath = szFile; + filePath = FindFileData.cFileName; + file_lists.push_back(filePath); + } + } + + }while(::FindNextFile(hFind,&FindFileData)); + + ::FindClose(hFind); + return file_lists; +} + +#else + +#include +#include +#include + +std::vector FindFiles::findFiles( const char *lpPath, const char *secName /*= ".*" */ ) +{ + (void)secName; + + std::vector result; + std::queue queue; + std::string dirname; + + DIR *dir; + if ( !(dir = opendir ( lpPath )) ) { + return result; + } + queue.push( lpPath ); + + struct dirent *ent; + while ( !queue.empty() ) + { + + dirname = queue.front(); + dir = opendir( dirname.c_str() ); + queue.pop(); + if ( !dir ) { continue; } + + while( ent = readdir( dir ) ) + { + + if ( strcmp(".", ent->d_name) == 0 || strcmp("..", ent->d_name) == 0 ) + { + continue; + } + if ( ent->d_type == DT_DIR ) + { + queue.push( dirname+"/"+ent->d_name ); + } + else + { + result.push_back( /*dirname + "/" + */ent->d_name); + } + + } + + closedir( dir ); + + } + + return result; +} + +#endif + +#endif // FIND_FILES_H diff --git a/src/VK/offscreen/include/stb_image.h b/src/VK/offscreen/include/stb_image.h new file mode 100644 index 0000000..1b7e7b0 --- /dev/null +++ b/src/VK/offscreen/include/stb_image.h @@ -0,0 +1,7763 @@ +/* stb_image - v2.26 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.26 (2020-07-13) many minor fixes + 2.25 (2020-02-02) fix warnings + 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically + 2.23 (2019-08-11) fix clang static analysis warning + 2.22 (2019-03-04) gif fixes, fix warnings + 2.21 (2019-02-25) fix typo in comment + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine + John-Mark Allen + Carmelo J Fdez-Aguera + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski + Phil Jordan Dave Moore Roy Eltham + Hayaki Saito Nathan Reed Won Chun + Luke Graham Johan Duparc Nick Verigakis the Horde3D community + Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Laurent Gomila Cort Stratton github:snagar + Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex + Cass Everitt Ryamond Barbiero github:grim210 + Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw + Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus + Josh Tobin Matthew Gregan github:poppolopoppo + Julian Raschke Gregory Mullen Christian Floisand github:darealshinji + Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 + Brad Weinberger Matvey Cherevko [reserved] + Luca Sas Alexander Veselov Zack Middleton [reserved] + Ryan C. Gordon [reserved] [reserved] + DO NOT ADD YOUR NAME HERE + + To add your name to the credits, pick a random blank space in the middle and fill it. + 80% of merge conflicts on stb PRs are due to people adding their name at the end + of the credits. +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// UNICODE: +// +// If compiling for Windows and you wish to use Unicode filenames, compile +// with +// #define STBI_WINDOWS_UTF8 +// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +// Windows wchar_t filenames to utf8. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy-to-use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// provide more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB, even though +// they are internally encoded differently. You can disable this conversion +// by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through (which +// is BGR stored in RGB). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +// than that size (in either width or height) without further processing. +// This is to let programs in the wild set an upper bound to prevent +// denial-of-service attacks on untrusted data, as one could generate a +// valid image of gigantic dimensions and force stb_image to allocate a +// huge block of memory and spend disproportionate time decoding it. By +// default this is set to (1 << 24), which is 16777216, but that's still +// very big. + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +#include +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef STBIDEF +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// on most compilers (and ALL modern mainstream compilers) this is threadsafe +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + +#ifndef STBI_NO_THREAD_LOCALS + #if defined(__cplusplus) && __cplusplus >= 201103L + #define STBI_THREAD_LOCAL thread_local + #elif defined(__GNUC__) && __GNUC__ < 5 + #define STBI_THREAD_LOCAL __thread + #elif defined(_MSC_VER) + #define STBI_THREAD_LOCAL __declspec(thread) + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) + #define STBI_THREAD_LOCAL _Thread_local + #endif + + #ifndef STBI_THREAD_LOCAL + #if defined(__GNUC__) + #define STBI_THREAD_LOCAL __thread + #endif + #endif +#endif + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#endif + +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif + +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +// assume GCC or Clang on ARM targets +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + int callback_already_read; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + int ch; + fseek((FILE*) user, n, SEEK_CUR); + ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ + if (ch != EOF) { + ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ + } +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user) || ferror((FILE *) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +#ifndef STBI_NO_FAILURE_STRINGS +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} +#endif + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load_global = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_global = flag_true_if_should_flip; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_local = flag_true_if_should_flip; + stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ + ? stbi__vertically_flip_on_load_local \ + : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #else + STBI_NOTUSED(bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} +#endif + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 8) { + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 16) { + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode))) + return 0; + +#if _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else +static void stbi__skip(stbi__context *s, int n) +{ + if (n == 0) return; // already there! + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} +#endif + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + return z + (stbi__get16le(s) << 16); +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + } + if (n < comp) { + for (i=0; i < x*y; ++i) { + output[i*comp + n] = data[i*comp + n]/255.0f; + } + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + k = stbi_lrot(j->code_buffer, n); + if (n < 0 || n >= (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))) return 0; + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & ~sgn); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + if (t == -1) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc << j->succ_low); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) << shift); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) << shift); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[288]; + stbi__uint16 value[288]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ + return (z->zbuffer >= z->zbuffer_end); +} + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + return stbi__zeof(z) ? 0 : *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + if (z->code_buffer >= (1U << z->num_bits)) { + z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ + return; + } + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s >= 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + if (b >= sizeof (z->size)) return -1; // some data was corrupt somewhere! + if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + return -1; /* report error for unexpected end of data. */ + } + stbi__fill_bits(a); + } + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + unsigned int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (unsigned int) (z->zout - z->zout_start); + limit = old_limit = (unsigned) (z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); + while (cur + n > limit) { + if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); + limit *= 2; + } + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) { + c = stbi__zreceive(a,3)+3; + } else if (c == 18) { + c = stbi__zreceive(a,7)+11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[288] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load = 0; +static int stbi__de_iphone_flag = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]={0}; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); + s->img_y = stbi__get32be(s); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth <= 8) + ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; + else + return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) { n += 16; z >>= 16; } + if (z >= 0x00100) { n += 8; z >>= 8; } + if (z >= 0x00010) { n += 4; z >>= 4; } + if (z >= 0x00004) { n += 2; z >>= 2; } + if (z >= 0x00002) { n += 1;/* >>= 1;*/ } + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; + int extra_read; +} stbi__bmp_data; + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + info->extra_read = 14; + + if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->extra_read += 12; + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - info.extra_read - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - info.extra_read - info.hsz) >> 2; + } + if (psize == 0) { + STBI_ASSERT(info.offset == s->callback_already_read + (int) (s->img_buffer - s->img_buffer_original)); + if (info.offset != s->callback_already_read + (s->img_buffer - s->buffer_start)) { + return stbi__errpuc("bad offset", "Corrupt BMP"); + } + } + + if (info.bpp == 24 && ma == 0xff000000) + s->img_n = 3; + else + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - info.extra_read - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i]; p1[i] = p2[i]; p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + STBI_NOTUSED(tga_x_origin); // @TODO + STBI_NOTUSED(tga_y_origin); // @TODO + + if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + if (tga_palette_len == 0) { /* you have to have at least one entry! */ + STBI_FREE(tga_data); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + STBI_NOTUSED(tga_palette_start); + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + + if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + STBI_NOTUSED(req_comp); + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) + return stbi__errpuc("too large", "GIF image is too large"); + pcount = g->w * g->h; + g->out = (stbi_uc *) stbi__malloc(4 * pcount); + g->background = (stbi_uc *) stbi__malloc(4 * pcount); + g->history = (stbi_uc *) stbi__malloc(pcount); + if (!g->out || !g->background || !g->history) + return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "transparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to the color that was there the previous frame. + memset(g->out, 0x00, 4 * pcount); + memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) + memset(g->history, 0x00, pcount); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispose of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + // if the width of the specified rectangle is 0, that means + // we may not see *any* pixels or the image is malformed; + // to make sure this is caught, move the current y down to + // max_y (which is what out_gif_code checks). + if (w == 0) + g->cur_y = g->max_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (!o) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + int out_size = 0; + int delays_size = 0; + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); + if (NULL == tmp) { + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + return stbi__errpuc("outofmem", "Out of memory"); + } + else { + out = (stbi_uc*) tmp; + out_size = layers * stride; + } + + if (delays) { + *delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); + delays_size = layers * sizeof(int); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + out_size = layers * stride; + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + delays_size = layers * sizeof(int); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + STBI_NOTUSED(ri); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } else if (g.out) { + // if there was an error and we allocated an image buffer, free it! + STBI_FREE(g.out); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + stbi__rewind( s ); + if (p == NULL) + return 0; + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) { + if (info.bpp == 24 && info.ma == 0xff000000) + *comp = 3; + else + *comp = info.ma ? 4 : 3; + } + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + (void) stbi__get32be(s); + (void) stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) +// Does not support 16-bit-per-channel + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + return 0; + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + + if (maxv > 255) + return stbi__err("max value > 255", "PPM image not 8-bit"); + else + return 1; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + diff --git a/src/VK/offscreen/include/tinyexr.h b/src/VK/offscreen/include/tinyexr.h new file mode 100644 index 0000000..da5aa4c --- /dev/null +++ b/src/VK/offscreen/include/tinyexr.h @@ -0,0 +1,13586 @@ +/* +Copyright (c) 2014 - 2019, Syoyo Fujita and many contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Syoyo Fujita nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// TinyEXR contains some OpenEXR code, which is licensed under ------------ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Industrial Light & Magic nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// End of OpenEXR license ------------------------------------------------- + +#ifndef TINYEXR_H_ +#define TINYEXR_H_ + +// +// +// Do this: +// #define TINYEXR_IMPLEMENTATION +// before you include this file in *one* C or C++ file to create the +// implementation. +// +// // i.e. it should look like this: +// #include ... +// #include ... +// #include ... +// #define TINYEXR_IMPLEMENTATION +// #include "tinyexr.h" +// +// + +#include // for size_t +#include // guess stdint.h is available(C99) + +#ifdef __cplusplus +extern "C" { +#endif + +// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib +// required if this flas is 0. +#ifndef TINYEXR_USE_MINIZ +#define TINYEXR_USE_MINIZ (1) +#endif + +// Disable PIZ comporession when applying cpplint. +#ifndef TINYEXR_USE_PIZ +#define TINYEXR_USE_PIZ (1) +#endif + +#ifndef TINYEXR_USE_ZFP +#define TINYEXR_USE_ZFP (0) // TinyEXR extension. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_THREAD +#define TINYEXR_USE_THREAD (0) // No threaded loading. +// http://computation.llnl.gov/projects/floating-point-compression +#endif + +#ifndef TINYEXR_USE_OPENMP +#ifdef _OPENMP +#define TINYEXR_USE_OPENMP (1) +#else +#define TINYEXR_USE_OPENMP (0) +#endif +#endif + +#define TINYEXR_SUCCESS (0) +#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) +#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) +#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) +#define TINYEXR_ERROR_INVALID_DATA (-4) +#define TINYEXR_ERROR_INVALID_FILE (-5) +#define TINYEXR_ERROR_INVALID_PARAMETER (-6) +#define TINYEXR_ERROR_CANT_OPEN_FILE (-7) +#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8) +#define TINYEXR_ERROR_INVALID_HEADER (-9) +#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10) +#define TINYEXR_ERROR_CANT_WRITE_FILE (-11) +#define TINYEXR_ERROR_SERIALZATION_FAILED (-12) +#define TINYEXR_ERROR_LAYER_NOT_FOUND (-13) + +// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } + +// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 +#define TINYEXR_PIXELTYPE_UINT (0) +#define TINYEXR_PIXELTYPE_HALF (1) +#define TINYEXR_PIXELTYPE_FLOAT (2) + +#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) +#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) + +#define TINYEXR_COMPRESSIONTYPE_NONE (0) +#define TINYEXR_COMPRESSIONTYPE_RLE (1) +#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) +#define TINYEXR_COMPRESSIONTYPE_ZIP (3) +#define TINYEXR_COMPRESSIONTYPE_PIZ (4) +#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension + +#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) +#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) +#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) + +#define TINYEXR_TILE_ONE_LEVEL (0) +#define TINYEXR_TILE_MIPMAP_LEVELS (1) +#define TINYEXR_TILE_RIPMAP_LEVELS (2) + +#define TINYEXR_TILE_ROUND_DOWN (0) +#define TINYEXR_TILE_ROUND_UP (1) + +typedef struct _EXRVersion { + int version; // this must be 2 + int tiled; // tile format image + int long_name; // long name attribute + int non_image; // deep image(EXR 2.0) + int multipart; // multi-part(EXR 2.0) +} EXRVersion; + +typedef struct _EXRAttribute { + char name[256]; // name and type are up to 255 chars long. + char type[256]; + unsigned char *value; // uint8_t* + int size; + int pad0; +} EXRAttribute; + +typedef struct _EXRChannelInfo { + char name[256]; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} EXRChannelInfo; + +typedef struct _EXRTile { + int offset_x; + int offset_y; + int level_x; + int level_y; + + int width; // actual width in a tile. + int height; // actual height int a tile. + + unsigned char **images; // image[channels][pixels] +} EXRTile; + +typedef struct _EXRHeader { + float pixel_aspect_ratio; + int line_order; + int data_window[4]; + int display_window[4]; + float screen_window_center[2]; + float screen_window_width; + + int chunk_count; + + // Properties for tiled format(`tiledesc`). + int tiled; + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + int long_name; + int non_image; + int multipart; + unsigned int header_len; + + // Custom attributes(exludes required attributes(e.g. `channels`, + // `compression`, etc) + int num_custom_attributes; + EXRAttribute *custom_attributes; // array of EXRAttribute. size = + // `num_custom_attributes`. + + EXRChannelInfo *channels; // [num_channels] + + int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for + // each channel. This is overwritten with `requested_pixel_types` when + // loading. + int num_channels; + + int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) + int *requested_pixel_types; // Filled initially by + // ParseEXRHeaderFrom(Meomory|File), then users + // can edit it(only valid for HALF pixel type + // channel) + +} EXRHeader; + +typedef struct _EXRMultiPartHeader { + int num_headers; + EXRHeader *headers; + +} EXRMultiPartHeader; + +typedef struct _EXRImage { + EXRTile *tiles; // Tiled pixel data. The application must reconstruct image + // from tiles manually. NULL if scanline format. + unsigned char **images; // image[channels][pixels]. NULL if tiled format. + + int width; + int height; + int num_channels; + + // Properties for tile format. + int num_tiles; + +} EXRImage; + +typedef struct _EXRMultiPartImage { + int num_images; + EXRImage *images; + +} EXRMultiPartImage; + +typedef struct _DeepImage { + const char **channel_names; + float ***image; // image[channels][scanlines][samples] + int **offset_table; // offset_table[scanline][offsets] + int num_channels; + int width; + int height; + int pad0; +} DeepImage; + +// @deprecated { For backward compatibility. Not recommended to use. } +// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel +// alpha) or RGB(A) channels. +// Application must free image data as returned by `out_rgba` +// Result image format is: float x RGBA x width x hight +// Returns negative value and may set error string in `err` when there's an +// error +extern int LoadEXR(float **out_rgba, int *width, int *height, + const char *filename, const char **err); + +// Loads single-frame OpenEXR image by specifing layer name. Assume EXR image contains A(single channel +// alpha) or RGB(A) channels. +// Application must free image data as returned by `out_rgba` +// Result image format is: float x RGBA x width x hight +// Returns negative value and may set error string in `err` when there's an +// error +// When the specified layer name is not found in the EXR file, the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`. +extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height, + const char *filename, const char *layer_name, const char **err); + +// +// Get layer infos from EXR file. +// +// @param[out] layer_names List of layer names. Application must free memory after using this. +// @param[out] num_layers The number of layers +// @param[out] err Error string(wll be filled when the function returns error code). Free it using FreeEXRErrorMessage after using this value. +// +// @return TINYEXR_SUCCEES upon success. +// +extern int EXRLayers(const char *filename, const char **layer_names[], int *num_layers, const char **err); + +// @deprecated { to be removed. } +// Simple wrapper API for ParseEXRHeaderFromFile. +// checking given file is a EXR file(by just look up header) +// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for +// others +extern int IsEXR(const char *filename); + +// @deprecated { to be removed. } +// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels. +// components must be 1(Grayscale), 3(RGB) or 4(RGBA). +// Input image format is: `float x width x height`, or `float x RGB(A) x width x +// hight` +// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero +// value. +// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. +// Use ZIP compression by default. +// Returns negative value and may set error string in `err` when there's an +// error +extern int SaveEXR(const float *data, const int width, const int height, + const int components, const int save_as_fp16, + const char *filename, const char **err); + +// Initialize EXRHeader struct +extern void InitEXRHeader(EXRHeader *exr_header); + +// Initialize EXRImage struct +extern void InitEXRImage(EXRImage *exr_image); + +// Free's internal data of EXRHeader struct +extern int FreeEXRHeader(EXRHeader *exr_header); + +// Free's internal data of EXRImage struct +extern int FreeEXRImage(EXRImage *exr_image); + +// Free's error message +extern void FreeEXRErrorMessage(const char *msg); + +// Parse EXR version header of a file. +extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); + +// Parse EXR version header from memory-mapped EXR data. +extern int ParseEXRVersionFromMemory(EXRVersion *version, + const unsigned char *memory, size_t size); + +// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, + const char *filename, const char **err); + +// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRHeaderFromMemory(EXRHeader *header, + const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err); + +// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` +// array. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const char *filename, + const char **err); + +// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` +// array +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, + int *num_headers, + const EXRVersion *version, + const unsigned char *memory, + size_t size, const char **err); + +// Loads single-part OpenEXR image from a file. +// Application must setup `ParseEXRHeaderFromFile` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, + const char *filename, const char **err); + +// Loads single-part OpenEXR image from a memory. +// Application must setup `EXRHeader` with +// `ParseEXRHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, + const unsigned char *memory, + const size_t size, const char **err); + +// Loads multi-part OpenEXR image from a file. +// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this +// function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromFile(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const char *filename, + const char **err); + +// Loads multi-part OpenEXR image from a memory. +// Application must setup `EXRHeader*` array with +// `ParseEXRMultipartHeaderFromMemory` before calling this function. +// Application can free EXRImage using `FreeEXRImage` +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRMultipartImageFromMemory(EXRImage *images, + const EXRHeader **headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a file. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int SaveEXRImageToFile(const EXRImage *image, + const EXRHeader *exr_header, const char *filename, + const char **err); + +// Saves multi-channel, single-frame OpenEXR image to a memory. +// Image is compressed using EXRImage.compression value. +// Return the number of bytes if success. +// Return zero and will set error string in `err` when there's an +// error. +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern size_t SaveEXRImageToMemory(const EXRImage *image, + const EXRHeader *exr_header, + unsigned char **memory, const char **err); + +// Loads single-frame OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadDeepEXR(DeepImage *out_image, const char *filename, + const char **err); + +// NOT YET IMPLEMENTED: +// Saves single-frame OpenEXR deep image. +// Returns negative value and may set error string in `err` when there's an +// error +// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, +// const char **err); + +// NOT YET IMPLEMENTED: +// Loads multi-part OpenEXR deep image. +// Application must free memory of variables in DeepImage(image, offset_table) +// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const +// char *filename, +// const char **err); + +// For emscripten. +// Loads single-frame OpenEXR image from memory. Assume EXR image contains +// RGB(A) channels. +// Returns negative value and may set error string in `err` when there's an +// error +// When there was an error message, Application must free `err` with +// FreeEXRErrorMessage() +extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err); + +#ifdef __cplusplus +} +#endif + +#endif // TINYEXR_H_ + +#ifdef TINYEXR_IMPLEMENTATION +#ifndef TINYEXR_IMPLEMENTATION_DEIFNED +#define TINYEXR_IMPLEMENTATION_DEIFNED + +#include +#include +#include +#include +#include +#include + +// #include // debug + +#include +#include +#include + +#if __cplusplus > 199711L +// C++11 +#include + +#if TINYEXR_USE_THREAD +#include +#include +#endif + +#endif // __cplusplus > 199711L + +#if TINYEXR_USE_OPENMP +#include +#endif + +#if TINYEXR_USE_MINIZ +#else +// Issue #46. Please include your own zlib-compatible API header before +// including `tinyexr.h` +//#include "zlib.h" +#endif + +#if TINYEXR_USE_ZFP +#include "zfp.h" +#endif + +namespace tinyexr { + +#if __cplusplus > 199711L +// C++11 +typedef uint64_t tinyexr_uint64; +typedef int64_t tinyexr_int64; +#else +// Although `long long` is not a standard type pre C++11, assume it is defined +// as a compiler's extension. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#endif +typedef unsigned long long tinyexr_uint64; +typedef long long tinyexr_int64; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +#endif + +#if TINYEXR_USE_MINIZ + +namespace miniz { + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wundef" + +#if __has_warning("-Wcomma") +#pragma clang diagnostic ignored "-Wcomma" +#endif + +#if __has_warning("-Wmacro-redefined") +#pragma clang diagnostic ignored "-Wmacro-redefined" +#endif + +#if __has_warning("-Wcast-qual") +#pragma clang diagnostic ignored "-Wcast-qual" +#endif + +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#if __has_warning("-Wtautological-constant-compare") +#pragma clang diagnostic ignored "-Wtautological-constant-compare" +#endif + +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif + +#endif + +/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP + reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: + http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the + archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO + (see the list below for more macros). + + * Change History + 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major + release with Zip64 support (almost there!): + - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug + (thanks kahmyong.moon@hp.com) which could cause locate files to not find + files. This bug + would only have occured in earlier versions if you explicitly used this + flag, OR if you used mz_zip_extract_archive_file_to_heap() or + mz_zip_add_mem_to_archive_file_in_place() + (which used this flag). If you can't switch to v1.15 but want to fix + this bug, just remove the uses of this flag from both helper funcs (and of + course don't use the flag). + - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when + pUser_read_buf is not NULL and compressed size is > uncompressed size + - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract + compressed data from directory entries, to account for weird zipfiles which + contain zero-size compressed data on dir entries. + Hopefully this fix won't cause any issues on weird zip archives, + because it assumes the low 16-bits of zip external attributes are DOS + attributes (which I believe they always are in practice). + - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the + internal attributes, just the filename and external attributes + - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed + - Added cmake support for Linux builds which builds all the examples, + tested with clang v3.3 and gcc v4.6. + - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti + - Merged MZ_FORCEINLINE fix from hdeanclark + - Fix include before config #ifdef, thanks emil.brink + - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping + (super useful for OpenGL apps), and explicit control over the compression + level (so you can + set it to 1 for real-time compression). + - Merged in some compiler fixes from paulharris's github repro. + - Retested this build under Windows (VS 2010, including static analysis), + tcc 0.9.26, gcc v4.6 and clang v3.3. + - Added example6.c, which dumps an image of the mandelbrot set to a PNG + file. + - Modified example2 to help test the + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. + - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix + possible src file fclose() leak if alignment bytes+local header file write + faiiled + - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): + Was pushing the wrong central dir header offset, appears harmless in this + release, but it became a problem in the zip64 branch + 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, + #include (thanks fermtect). + 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix + mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. + - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and + re-ran a randomized regression test on ~500k files. + - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. + - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze + (static analysis) option and fixed all warnings (except for the silly + "Use of the comma-operator in a tested expression.." analysis warning, + which I purposely use to work around a MSVC compiler warning). + - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and + tested Linux executables. The codeblocks workspace is compatible with + Linux+Win32/x64. + - Added miniz_tester solution/project, which is a useful little app + derived from LZHAM's tester app that I use as part of the regression test. + - Ran miniz.c and tinfl.c through another series of regression testing on + ~500,000 files and archives. + - Modified example5.c so it purposely disables a bunch of high-level + functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the + MINIZ_NO_STDIO bug report.) + - Fix ftell() usage in examples so they exit with an error on files which + are too large (a limitation of the examples, not miniz itself). + 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple + minor level_and_flags issues in the archive API's. + level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce + Dawson for the feedback/bug report. + 5/28/11 v1.11 - Added statement from unlicense.org + 5/27/11 v1.10 - Substantial compressor optimizations: + - Level 1 is now ~4x faster than before. The L1 compressor's throughput + now varies between 70-110MB/sec. on a + - Core i7 (actual throughput varies depending on the type of data, and x64 + vs. x86). + - Improved baseline L2-L9 compression perf. Also, greatly improved + compression perf. issues on some file types. + - Refactored the compression code for better readability and + maintainability. + - Added level 10 compression level (L10 has slightly better ratio than + level 9, but could have a potentially large + drop in throughput on some files). + 5/15/11 v1.09 - Initial stable release. + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, + and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. + It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is + implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB + (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory + allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough + functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly + routines. + Supports raw deflate streams or standard zlib streams with adler-32 + checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or + zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing + and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, + originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in + mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file + information, read files from + existing archives, create new archives, append new files to existing + archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, + on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a + disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const + char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an + archive, the entire central + directory is located and read as-is into memory, and subsequent file access + only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a + loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one + example) can be used to identify + multiple versions of the same file in an archive. This function uses a + simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using + mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer + immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The + central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file + data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, + the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives + written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is + to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, + const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 + comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be + appended to. + Note the appending is done in-place and is not an atomic operation, so if + something goes wrong + during the operation it's possible the archive could be left without a + central directory (although the local + file headers and file data will be fine, so the archive will be + recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, + cloning only those bits you want to + preserve into a new archive using using the + mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and + rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or + heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using + mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an + updated central directory to the + original archive. (This is basically what + mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this + method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle + unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, + either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then + include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your + target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before + including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be + able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +//#include + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be +// CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on +// stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able +// to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be +// called. +// The current downside is the times written to your archives will be from 1979. +#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive +// API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression +// API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent +// conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom +// user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's +// which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and +// tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc +// on Linux +#define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +//#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__i386) || defined(__i486__) || defined(__i486) || \ + defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if defined(__sparcv9) +// Big endian +#else +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient +// integer loads and stores from unaligned addresses. +//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \ + 0 // disable to suppress compiler warnings +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \ + defined(_LP64) || defined(__LP64__) || defined(__ia64__) || \ + defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are +// reasonably fast (and don't involve compiler generated calls to helper +// functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some +// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() +// unless you've modified the MZ_MALLOC macro) to release a block allocated from +// the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with +// ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with +// ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { + MZ_DEFAULT_STRATEGY = 0, + MZ_FILTERED = 1, + MZ_HUFFMAN_ONLY = 2, + MZ_RLE = 3, + MZ_FIXED = 4 +}; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: +// items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, + size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The +// other values are for advanced use (refer to the zlib docs). +enum { + MZ_NO_FLUSH = 0, + MZ_PARTIAL_FLUSH = 1, + MZ_SYNC_FLUSH = 2, + MZ_FULL_FLUSH = 3, + MZ_FINISH = 4, + MZ_BLOCK = 5 +}; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { + MZ_OK = 0, + MZ_STREAM_END = 1, + MZ_NEED_DICT = 2, + MZ_ERRNO = -1, + MZ_STREAM_ERROR = -2, + MZ_DATA_ERROR = -3, + MZ_MEM_ERROR = -4, + MZ_BUF_ERROR = -5, + MZ_VERSION_ERROR = -6, + MZ_PARAM_ERROR = -10000 +}; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best +// possible compression (not zlib compatible, and may be very slow), +// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { + MZ_NO_COMPRESSION = 0, + MZ_BEST_SPEED = 1, + MZ_BEST_COMPRESSION = 9, + MZ_UBER_COMPRESSION = 10, + MZ_DEFAULT_LEVEL = 6, + MZ_DEFAULT_COMPRESSION = -1 +}; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s { + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func + zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been +// optimized purely for performance, not ratio. +// (This special func. is currently only enabled when +// MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with +// zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no +// header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, + int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as +// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input +// and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update +// the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or +// MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not +// available, and/or there's more output to be written but the output buffer +// is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been +// written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or +// output buffers are empty. (Fill up the input buffer or free up some output +// space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of +// data that could be generated by deflate(), assuming flush is set to only +// MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on +// failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of +// data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that +// controls the window size and whether or not the stream has been wrapped with +// a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or +// -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the +// input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update +// the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output +// buffers are both sized large enough to decompress the entire stream in a +// single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside +// what's already in the input buffer, and that the output buffer is large +// enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or +// there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes +// have been written. For zlib streams, the adler-32 of the decompressed data +// has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is +// empty but the inflater needs more input to continue, or if the output +// buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when +// using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on +// failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the +// error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used +// as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you +// use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef mz_ulong uLong; +typedef Byte Bytef; +typedef uInt uIntf; +typedef char charf; +typedef int intf; +typedef void *voidpf; +typedef uLong uLongf; +typedef void *voidp; +typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional +// expression is constant" message. +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) +#else +#define MZ_MACRO_END while (0) +#endif + +// ------------------- ZIP archive reading/writing + +#ifndef MINIZ_NO_ARCHIVE_APIS + +enum { + MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 +}; + +typedef struct { + mz_uint32 m_file_index; + mz_uint32 m_central_dir_ofs; + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; +#ifndef MINIZ_NO_TIME + time_t m_time; +#endif + mz_uint32 m_crc32; + mz_uint64 m_comp_size; + mz_uint64 m_uncomp_size; + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + mz_uint64 m_local_header_ofs; + mz_uint32 m_comment_size; + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; +} mz_zip_archive_file_stat; + +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n); + +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + +typedef enum { + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; + +typedef struct mz_zip_archive_tag { + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + mz_uint m_total_files; + mz_zip_mode m_zip_mode; + + mz_uint m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + +} mz_zip_archive; + +typedef enum { + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 +} mz_zip_flags; + +// ZIP archive reading + +// Inits a ZIP archive reader. +// These functions read and validate the archive's central directory. +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, + mz_uint32 flags); +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, + size_t size, mz_uint32 flags); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint32 flags); +#endif + +// Returns the total number of files in the archive. +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + +// Returns detailed information about an archive file entry. +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, + mz_zip_archive_file_stat *pStat); + +// Determines if an archive file entry is a directory entry. +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, + mz_uint file_index); +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, + mz_uint file_index); + +// Retrieves the filename of an archive file entry. +// Returns the number of bytes written to pFilename, or if filename_buf_size is +// 0 this function returns the number of bytes needed to fully store the +// filename. +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, + char *pFilename, mz_uint filename_buf_size); + +// Attempts to locates a file in the archive's central directory. +// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH +// Returns -1 if the file cannot be found. +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags); + +// Extracts a archive file to a memory buffer using no memory allocation. +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, + mz_uint file_index, void *pBuf, + size_t buf_size, mz_uint flags, + void *pUser_read_buf, + size_t user_read_buf_size); +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( + mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, + mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + +// Extracts a archive file to a memory buffer. +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, + void *pBuf, size_t buf_size, + mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, + const char *pFilename, void *pBuf, + size_t buf_size, mz_uint flags); + +// Extracts a archive file to a dynamically allocated heap buffer. +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, + size_t *pSize, mz_uint flags); +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, + const char *pFilename, size_t *pSize, + mz_uint flags); + +// Extracts a archive file using a callback function to output the file's data. +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, + mz_uint file_index, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, + const char *pFilename, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +// Extracts a archive file to a disk file and sets its last accessed and +// modified times. +// This function only extracts files, not archive directory records. +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, + const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, + const char *pArchive_filename, + const char *pDst_filename, + mz_uint flags); +#endif + +// Ends archive reading, freeing all allocations, and closing the input archive +// file if mz_zip_reader_init_file() was used. +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + +// ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +// Inits a ZIP archive writer. +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, + size_t size_to_reserve_at_beginning, + size_t initial_allocation_size); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint64 size_to_reserve_at_beginning); +#endif + +// Converts a ZIP archive reader object into a writer object, to allow efficient +// in-place file appends to occur on an existing archive. +// For archives opened using mz_zip_reader_init_file, pFilename must be the +// archive's filename so it can be reopened for writing. If the file can't be +// reopened, mz_zip_reader_end() will be called. +// For archives opened using mz_zip_reader_init_mem, the memory block must be +// growable using the realloc callback (which defaults to realloc unless you've +// overridden it). +// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's +// user provided m_pWrite function cannot be NULL. +// Note: In-place archive modification is not recommended unless you know what +// you're doing, because if execution stops or something goes wrong before +// the archive is finalized the file's central directory will be hosed. +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, + const char *pFilename); + +// Adds the contents of a memory buffer to an archive. These functions record +// the current local time into the archive. +// To add a directory entry, call this method with an archive name ending in a +// forwardslash with empty buffer. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, + const void *pBuf, size_t buf_size, + mz_uint level_and_flags); +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, + const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags, mz_uint64 uncomp_size, + mz_uint32 uncomp_crc32); + +#ifndef MINIZ_NO_STDIO +// Adds the contents of a disk file to an archive. This function also records +// the disk file's modified time into the archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, + const char *pSrc_filename, const void *pComment, + mz_uint16 comment_size, mz_uint level_and_flags); +#endif + +// Adds a file to an archive by fully cloning the data from another archive. +// This function fully clones the source file's compressed data (no +// recompression), along with its full filename, extra data, and comment fields. +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, + mz_zip_archive *pSource_zip, + mz_uint file_index); + +// Finalizes the archive by writing the central directory records followed by +// the end of central directory record. +// After an archive is finalized, the only valid call on the mz_zip_archive +// struct is mz_zip_writer_end(). +// An archive must be manually finalized by calling this function for it to be +// valid. +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, + size_t *pSize); + +// Ends archive writing, freeing all allocations, and closing the output file if +// mz_zip_writer_init_file() was used. +// Note for the archive to be valid, it must have been finalized before ending. +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + +// Misc. high-level helper functions: + +// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) +// appends a memory blob to a ZIP archive. +// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or +// just set to MZ_DEFAULT_COMPRESSION. +mz_bool mz_zip_add_mem_to_archive_file_in_place( + const char *pZip_filename, const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, mz_uint16 comment_size, + mz_uint level_and_flags); + +// Reads a single file from an archive into a heap block. +// Returns NULL on failure. +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, + const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and +// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the +// input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available +// beyond the end of the supplied input buffer. If clear, the input buffer +// contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large +// enough to hold the entire decompressed stream. If clear, the output buffer is +// at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the +// decompressed bytes. +enum { + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block +// allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data +// to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger +// than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer +// needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block +// in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes +// written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an +// internal 32KB buffer, and a user provided callback function will be called to +// flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, + tinfl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; +typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum { + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) \ + do { \ + (r)->m_state = 0; \ + } \ + MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function +// actually needed for decompression. All the other functions are just +// high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any +// desired higher level decompression API. In the limit case, it can be called +// once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, + const mz_uint8 *pIn_buf_next, + size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, + mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, + const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum { + TINFL_MAX_HUFF_TABLES = 3, + TINFL_MAX_HUFF_SYMBOLS_0 = 288, + TINFL_MAX_HUFF_SYMBOLS_1 = 32, + TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, + TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct { + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], + m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF +typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else +typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag { + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, + m_check_adler32, m_dist, m_counter, m_num_extra, + m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], + m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly +// slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain +// the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes +// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap +// compression), 4095=Huffman+LZ (slowest/best compression). +enum { + TDEFL_HUFFMAN_ONLY = 0, + TDEFL_DEFAULT_MAX_PROBES = 128, + TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before +// the deflate data, and the Adler-32 of the source data at the end. Otherwise, +// you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even +// when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more +// efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's +// initialization time to the minimum, but the output may vary from run to run +// given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per +// dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum { + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block +// allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against +// the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger +// than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in +// memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be +// 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost +// pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, +// MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL +// apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be +// larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, + int h, int num_chans, + size_t *pLen_out, + mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, + int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write +// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, + void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The +// above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +enum { + TDEFL_MAX_HUFF_TABLES = 3, + TDEFL_MAX_HUFF_SYMBOLS_0 = 288, + TDEFL_MAX_HUFF_SYMBOLS_1 = 32, + TDEFL_MAX_HUFF_SYMBOLS_2 = 19, + TDEFL_LZ_DICT_SIZE = 32768, + TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, + TDEFL_MIN_MATCH_LEN = 3, + TDEFL_MAX_MATCH_LEN = 258 +}; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed +// output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { + TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 12, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#else +enum { + TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 15, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#endif + +// The low-level tdefl functions below may be used directly if the above helper +// functions aren't flexible enough. The low-level functions don't make any heap +// allocations, unlike the above helper functions. +typedef enum { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1 +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct { + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, + m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, + m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, + m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not +// dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified +// callback. In this case, the user should call the tdefl_compress_buffer() API +// for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() +// API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, +// etc.) +tdefl_status tdefl_init(tdefl_compressor *d, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer +// as possible, and writing as much compressed data to the specified output +// buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, + size_t *pIn_buf_size, void *pOut_buf, + size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a +// non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, + size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't +// defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be +// much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, +// MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, + int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +#ifdef __cplusplus +} +#endif + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want +// the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; + +//#include +//#include + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) \ + ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) \ + ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \ + ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); + size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C +// implementation that balances processor cache usage against speed": +// http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { + static const mz_uint32 s_crc32[16] = { + 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, + 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, + 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; + while (buf_len--) { + mz_uint8 b = *ptr++; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; + } + return ~crcu32; +} + +void mz_free(void *p) { MZ_FREE(p); } + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { + (void)opaque, (void)items, (void)size; + return MZ_MALLOC(items * size); +} +static void def_free_func(void *opaque, void *address) { + (void)opaque, (void)address; + MZ_FREE(address); +} +// static void *def_realloc_func(void *opaque, void *address, size_t items, +// size_t size) { +// (void)opaque, (void)address, (void)items, (void)size; +// return MZ_REALLOC(address, items * size); +//} + +const char *mz_version(void) { return MZ_VERSION; } + +int mz_deflateInit(mz_streamp pStream, int level) { + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, + MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, + int mem_level, int strategy) { + tdefl_compressor *pComp; + mz_uint comp_flags = + TDEFL_COMPUTE_ADLER32 | + tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || + ((window_bits != MZ_DEFAULT_WINDOW_BITS) && + (-window_bits != MZ_DEFAULT_WINDOW_BITS))) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, + sizeof(tdefl_compressor)); + if (!pComp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) { + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || + (!pStream->zfree)) + return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, + ((tdefl_compressor *)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) { + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || + (!pStream->next_out)) + return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor *)pStream->state)->m_prev_return_status == + TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; + orig_total_out = pStream->total_out; + for (;;) { + tdefl_status defl_status; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor *)pStream->state, + pStream->next_in, &in_bytes, pStream->next_out, + &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) { + mz_status = MZ_STREAM_ERROR; + break; + } else if (defl_status == TDEFL_STATUS_DONE) { + mz_status = MZ_STREAM_END; + break; + } else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { + if ((flush) || (pStream->total_in != orig_total_in) || + (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) { + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty + // tricky to compute a true upper bound given the way tdefl's blocking works.) + return MZ_MAX(128 + (source_len * 110) / 100, + 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len, int level) { + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len) { + return mz_compress2(pDest, pDest_len, pSource, source_len, + MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) { + return mz_deflateBound(NULL, source_len); +} + +typedef struct { + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; + int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) { + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && + (-window_bits != MZ_DEFAULT_WINDOW_BITS)) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, + sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) { + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) { + inflate_state *pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + + pState = (inflate_state *)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; + pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) { + // MZ_FINISH on the first call implies that the input and output buffers are + // large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, + pStream->next_out, pStream->next_out, &out_bytes, + decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && + (!pState->m_dict_avail)) + ? MZ_STREAM_END + : MZ_OK; + } + + for (;;) { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress( + &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, + pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some + // uncompressed data left in the output dictionary - + // oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress + // without supplying more input or by setting flush + // to MZ_FINISH. + else if (flush == MZ_FINISH) { + // The output buffer MUST be large to hold the remaining uncompressed data + // when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's + // at least 1 more byte on the way. If there's no more room left in the + // output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || + (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) + ? MZ_STREAM_END + : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) { + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, + const unsigned char *pSource, mz_ulong source_len) { + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR + : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) { + static struct { + int m_err; + const char *m_pDesc; + } s_error_descs[] = {{MZ_OK, ""}, + {MZ_STREAM_END, "stream end"}, + {MZ_NEED_DICT, "need dictionary"}, + {MZ_ERRNO, "file error"}, + {MZ_STREAM_ERROR, "stream error"}, + {MZ_DATA_ERROR, "data error"}, + {MZ_MEM_ERROR, "out of memory"}, + {MZ_BUF_ERROR, "buf error"}, + {MZ_VERSION_ERROR, "version error"}, + {MZ_PARAM_ERROR, "parameter error"}}; + mz_uint i; + for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) + if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all +// compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN \ + switch (r->m_state) { \ + case 0: +#define TINFL_CR_RETURN(state_index, result) \ + do { \ + status = result; \ + r->m_state = state_index; \ + goto common_exit; \ + case state_index:; \ + } \ + MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) \ + do { \ + for (;;) { \ + TINFL_CR_RETURN(state_index, result); \ + } \ + } \ + MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt +// to read beyond the input buf, then something is wrong with the input because +// the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of +// the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) \ + do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for (;;) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else \ + c = *pIn_buf_cur++; \ + } \ + MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) \ + do { \ + mz_uint c; \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) \ + do { \ + if (num_bits < (mz_uint)(n)) { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) \ + do { \ + if (num_bits < (mz_uint)(n)) { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + b = bit_buf & ((1 << (n)) - 1); \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes +// remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode +// the next Huffman code (and absolutely no more). It works by trying to fully +// decode a +// Huffman code by using whatever bits are currently present in the bit buffer. +// If this fails, it reads another byte, and tries again until it succeeds or +// until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); \ + if (temp >= 0) break; \ + } \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex +// than you would initially expect because the zlib API expects the decompressor +// to never read +// beyond the final byte of the deflate stream. (In other words, when this macro +// wants to read another byte from the input, it REALLY needs another byte in +// order to fully +// decode the next Huffman code.) Handling this properly is particularly +// important on raw deflate (non-zlib) streams, which aren't followed by a byte +// aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ + do { \ + int temp; \ + mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | \ + (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ + pIn_buf_cur += 2; \ + num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \ + 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while (temp < 0); \ + } \ + sym = temp; \ + bit_buf >>= code_len; \ + num_bits -= code_len; \ + } \ + MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, + const mz_uint8 *pIn_buf_next, + size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, + mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, + const mz_uint32 decomp_flags) { + static const int s_length_base[31] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, + 4, 4, 5, 5, 5, 5, 0, 0, 0}; + static const int s_dist_base[32] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, + 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, + 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; + static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + static const mz_uint8 s_length_dezigzag[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + static const int s_min_table_sizes[3] = {257, 1, 4}; + + tinfl_status status = TINFL_STATUS_FAILED; + mz_uint32 num_bits, dist, counter, num_extra; + tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = + pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = + pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = + (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) + ? (size_t)-1 + : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, + dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer + // is large enough to hold the entire output file (in which case it doesn't + // matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || + (pOut_buf_next < pOut_buf_start)) { + *pIn_buf_size = *pOut_buf_size = 0; + return TINFL_STATUS_BAD_PARAM; + } + + num_bits = r->m_num_bits; + bit_buf = r->m_bit_buf; + dist = r->m_dist; + counter = r->m_counter; + num_extra = r->m_num_extra; + dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; + r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { + TINFL_GET_BYTE(1, r->m_zhdr0); + TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || + (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || + ((out_buf_size_mask + 1) < + (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { + TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); + } + } + + do { + TINFL_GET_BITS(3, r->m_final, 3); + r->m_type = r->m_final >> 1; + if (r->m_type == 0) { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { + if (num_bits) + TINFL_GET_BITS(6, r->m_raw_header[counter], 8); + else + TINFL_GET_BYTE(7, r->m_raw_header[counter]); + } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != + (mz_uint)(0xFFFF ^ + (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { + TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); + } + while ((counter) && (num_bits)) { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) { + size_t n; + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); + } + while (pIn_buf_cur >= pIn_buf_end) { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } else { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), + (size_t)(pIn_buf_end - pIn_buf_cur)), + counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); + pIn_buf_cur += n; + pOut_buf_cur += n; + counter -= (mz_uint)n; + } + } else if (r->m_type == 3) { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } else { + if (r->m_type == 1) { + mz_uint8 *p = r->m_tables[0].m_code_size; + mz_uint i; + r->m_table_sizes[0] = 288; + r->m_table_sizes[1] = 32; + TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for (i = 0; i <= 143; ++i) *p++ = 8; + for (; i <= 255; ++i) *p++ = 9; + for (; i <= 279; ++i) *p++ = 7; + for (; i <= 287; ++i) *p++ = 8; + } else { + for (counter = 0; counter < 3; counter++) { + TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); + r->m_table_sizes[counter] += s_min_table_sizes[counter]; + } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); + for (counter = 0; counter < r->m_table_sizes[2]; counter++) { + mz_uint s; + TINFL_GET_BITS(14, s, 3); + r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; + } + r->m_table_sizes[2] = 19; + } + for (; (int)r->m_type >= 0; r->m_type--) { + int tree_next, tree_cur; + tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], + total_syms[16]; + pTable = &r->m_tables[r->m_type]; + MZ_CLEAR_OBJ(total_syms); + MZ_CLEAR_OBJ(pTable->m_look_up); + MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) + total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; + next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { + used_syms += total_syms[i]; + next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); + } + if ((65536 != total) && (used_syms > 1)) { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; + sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { + mz_uint rev_code = 0, l, cur_code, + code_size = pTable->m_code_size[sym_index]; + if (!code_size) continue; + cur_code = next_code[code_size]++; + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { + mz_int16 k = (mz_int16)((code_size << 9) | sym_index); + while (rev_code < TINFL_FAST_LOOKUP_SIZE) { + pTable->m_look_up[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + if (0 == + (tree_cur = pTable->m_look_up[rev_code & + (TINFL_FAST_LOOKUP_SIZE - 1)])) { + pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = + (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { + pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } else + tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); + pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) { + for (counter = 0; + counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { + mz_uint s; + TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); + if (dist < 16) { + r->m_len_codes[counter++] = (mz_uint8)dist; + continue; + } + if ((dist == 16) && (!counter)) { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; + TINFL_GET_BITS(18, s, num_extra); + s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, + (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, + r->m_table_sizes[0]); + TINFL_MEMCPY(r->m_tables[1].m_code_size, + r->m_len_codes + r->m_table_sizes[0], + r->m_table_sizes[1]); + } + } + for (;;) { + mz_uint8 *pSrc; + for (;;) { + if (((pIn_buf_end - pIn_buf_cur) < 4) || + ((pOut_buf_end - pOut_buf_cur) < 2)) { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) break; + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)counter; + } else { + int sym2; + mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 4; + num_bits += 32; + } +#else + if (num_bits < 15) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = + r->m_tables[0] + .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= + 0) + code_len = sym2 >> 9; + else { + code_len = TINFL_FAST_LOOKUP_BITS; + do { + sym2 = r->m_tables[0] + .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + counter = sym2; + bit_buf >>= code_len; + num_bits -= code_len; + if (counter & 256) break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { + bit_buf |= + (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = + r->m_tables[0] + .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= + 0) + code_len = sym2 >> 9; + else { + code_len = TINFL_FAST_LOOKUP_BITS; + do { + sym2 = r->m_tables[0] + .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + bit_buf >>= code_len; + num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; + counter = s_length_base[counter - 257]; + if (num_extra) { + mz_uint extra_bits; + TINFL_GET_BITS(25, extra_bits, num_extra); + counter += extra_bits; + } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; + dist = s_dist_base[dist]; + if (num_extra) { + mz_uint extra_bits; + TINFL_GET_BITS(27, extra_bits, num_extra); + dist += extra_bits; + } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && + (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { + while (counter--) { + while (pOut_buf_cur >= pOut_buf_end) { + TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = + pOut_buf_start[(dist_from_out_buf_start++ - dist) & + out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) { + if (counter) { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; + pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { + TINFL_SKIP_BITS(32, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { + mz_uint s; + if (num_bits) + TINFL_GET_BITS(41, s, 8); + else + TINFL_GET_BYTE(42, s); + r->m_z_adler32 = (r->m_z_adler32 << 8) | s; + } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; + r->m_bit_buf = bit_buf; + r->m_dist = dist; + r->m_counter = counter; + r->m_num_extra = num_extra; + r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; + *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & + (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && + (status >= 0)) { + const mz_uint8 *ptr = pOut_buf_next; + size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, + s2 = r->m_check_adler32 >> 16; + size_t block_len = buf_len % 5552; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; + s1 += ptr[1], s2 += s1; + s1 += ptr[2], s2 += s1; + s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; + s1 += ptr[5], s2 += s1; + s1 += ptr[6], s2 += s1; + s1 += ptr[7], s2 += s1; + } + for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && + (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && + (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags) { + tinfl_decompressor decomp; + void *pBuf = NULL, *pNew_buf; + size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for (;;) { + size_t src_buf_size = src_buf_len - src_buf_ofs, + dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress( + &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, + (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, + &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; + if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + pBuf = pNew_buf; + out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags) { + tinfl_decompressor decomp; + tinfl_status status; + tinfl_init(&decomp); + status = + tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, + (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED + : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, + tinfl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); + size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for (;;) { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, + dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = + tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, + &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && + (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression +// API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, + 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, + 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, + 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, + 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, + 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, + 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 285}; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, + 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted +// values. +typedef struct { + mz_uint16 m_key, m_sym_index; +} tdefl_sym_freq; +static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, + tdefl_sym_freq *pSyms0, + tdefl_sym_freq *pSyms1) { + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { + mz_uint freq = pSyms0[i].m_key; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { + const mz_uint32 *pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = + pCur_syms[i]; + { + tdefl_sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, +// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { + int root, leaf, next, avbl, used, dpth; + if (n == 0) + return; + else if (n == 1) { + A[0].m_key = 1; + return; + } + A[0].m_key += A[1].m_key; + root = 0; + leaf = 2; + for (next = 1; next < n - 1; next++) { + if (leaf >= n || A[root].m_key < A[leaf].m_key) { + A[next].m_key = A[root].m_key; + A[root++].m_key = (mz_uint16)next; + } else + A[next].m_key = A[leaf++].m_key; + if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { + A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); + A[root++].m_key = (mz_uint16)next; + } else + A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); + } + A[n - 2].m_key = 0; + for (next = n - 3; next >= 0; next--) + A[next].m_key = A[A[next].m_key].m_key + 1; + avbl = 1; + used = dpth = 0; + root = n - 2; + next = n - 1; + while (avbl > 0) { + while (root >= 0 && (int)A[root].m_key == dpth) { + used++; + root--; + } + while (avbl > used) { + A[next--].m_key = (mz_uint16)(dpth); + avbl--; + } + avbl = 2 * used; + dpth++; + used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, + int code_list_len, + int max_code_size) { + int i; + mz_uint32 total = 0; + if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) + total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + if (pNum_codes[i]) { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, + int table_len, int code_size_limit, + int static_table) { + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; + mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; + MZ_CLEAR_OBJ(num_codes); + if (static_table) { + for (i = 0; i < table_len; i++) + num_codes[d->m_huff_code_sizes[table_num][i]]++; + } else { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], + *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) + if (pSym_count[i]) { + syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; + syms0[num_used_syms++].m_sym_index = (mz_uint16)i; + } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); + tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, + code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); + MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) + d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; + for (j = 0, i = 2; i <= code_size_limit; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) { + mz_uint rev_code = 0, code, code_size; + if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; + for (l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) \ + do { \ + mz_uint bits = b; \ + mz_uint len = l; \ + MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); \ + d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ + } \ + MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() \ + { \ + if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)( \ + d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) \ + packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 16; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_repeat_count - 3); \ + } \ + rle_repeat_count = 0; \ + } \ + } + +#define TDEFL_RLE_ZERO_CODE_SIZE() \ + { \ + if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = \ + (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ + while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 17; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 18; \ + packed_code_sizes[num_packed_code_sizes++] = \ + (mz_uint8)(rle_z_count - 11); \ + } \ + rle_z_count = 0; \ + } \ + } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) { + int num_lit_codes, num_dist_codes, num_bit_lengths; + mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, + rle_repeat_count, packed_code_sizes_index; + mz_uint8 + code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], + packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], + prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) + if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) + if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], + num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; + num_packed_code_sizes = 0; + rle_z_count = 0; + rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, + sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + } else { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = + (mz_uint16)(d->m_huff_count[2][code_size] + 1); + packed_code_sizes[num_packed_code_sizes++] = code_size; + } else if (++rle_repeat_count == 6) { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { + TDEFL_RLE_PREV_CODE_SIZE(); + } else { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) + if (d->m_huff_code_sizes + [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) + break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); + TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) + TDEFL_PUT_BITS( + d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; + packed_code_sizes_index < num_packed_code_sizes;) { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; + MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) + TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], + "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) { + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for (; i <= 255; ++i) *p++ = 9; + for (; i <= 279; ++i) *p++ = 7; + for (; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, + 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \ + MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) \ + { \ + bit_buffer |= (((mz_uint64)(b)) << bits_in); \ + bits_in += (l); \ + } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; + flags >>= 1) { + if (flags == 1) flags = *pLZ_codes++ | 0x100; + + if (flags & 1) { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], + match_dist = *(const mz_uint16 *)(pLZ_codes + 1); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], + d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], + s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], + d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], + num_extra_bits); + } else { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], + d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; + + *(mz_uint64 *)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; + flags >>= 1) { + if (flags == 1) flags = *pLZ_codes++ | 0x100; + if (flags & 1) { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], + match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], + d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], + s_tdefl_len_extra[match_len]); + + if (match_dist < 512) { + sym = s_tdefl_small_dist_sym[match_dist]; + num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } else { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; + num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } else { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && + // MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) { + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = + ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && + (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = + ((d->m_pPut_buf_func == NULL) && + ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) + ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) + : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { + TDEFL_PUT_BITS(0x78, 8); + TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; + saved_bit_buf = d->m_bit_buffer; + saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = + tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || + (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output + // buffer and send a raw block instead. + if (((use_raw_block) || + ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= + d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { + mz_uint i; + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) { + TDEFL_PUT_BITS( + d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], + 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed + // block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) { + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) { + if (flush == TDEFL_FINISH) { + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { + mz_uint i, a = d->m_adler32; + for (i = 0; i < 4; i++) { + TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); + a <<= 8; + } + } + } else { + mz_uint i, z = 0; + TDEFL_PUT_BITS(0, 3); + if (d->m_bits_in) { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, z ^= 0xFFFF) { + TDEFL_PUT_BITS(z & 0xFFFF, 16); + } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, + sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, + sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; + d->m_total_lz_bytes = 0; + d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { + if (d->m_pPut_buf_func) { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } else if (pOutput_buf_start == d->m_output_buf) { + int bytes_to_copy = (int)MZ_MIN( + (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, + bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } else { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) +static MZ_FORCEINLINE void tdefl_find_match( + tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, + mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, + match_len = *pMatch_len, probe_pos = pos, next_probe_pos, + probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), + s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) return; + for (;;) { + for (;;) { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || \ + ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) break; + q = (const mz_uint16 *)(d->m_dict + probe_pos); + if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; + p = s; + probe_len = 32; + do { + } while ( + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (--probe_len > 0)); + if (!probe_len) { + *pMatch_dist = dist; + *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); + break; + } else if ((probe_len = ((mz_uint)(p - s) * 2) + + (mz_uint)(*(const mz_uint8 *)p == + *(const mz_uint8 *)q)) > match_len) { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == + max_match_len) + break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match( + tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, + mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, + match_len = *pMatch_len, probe_pos = pos, next_probe_pos, + probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) return; + for (;;) { + for (;;) { + if (--num_probes_left == 0) return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || \ + ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && \ + (d->m_dict[probe_pos + match_len - 1] == c1)) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) break; + p = s; + q = d->m_dict + probe_pos; + for (probe_len = 0; probe_len < max_match_len; probe_len++) + if (*p++ != *q++) break; + if (probe_len > match_len) { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; + c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) { + // Faster, minimally featured LZRW1-style match+parse loop with better + // register utilization. Intended for applications where raw throughput is + // valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, + lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, + total_lz_bytes = d->m_total_lz_bytes, + num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = + (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( + d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, + MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) + break; + + while (lookahead_size >= 4) { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = + (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & + TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= + dict_size) && + ((*(const mz_uint32 *)(d->m_dict + + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & + 0xFFFFFF) == first_trigram)) { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { + } while ((TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == + TDEFL_READ_UNALIGNED_WORD(++q)) && + (--probe_len > 0)); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || + ((cur_match_len == TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 8U * 1024U))) { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } else { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 1) && + (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - + TDEFL_MIN_MATCH_LEN]]++; + } + } else { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, + mz_uint8 lit) { + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); + if (--d->m_num_flags_left == 0) { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, + mz_uint match_len, + mz_uint match_dist) { + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && + (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); + d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); + if (--d->m_num_flags_left == 0) { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) + d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) { + const mz_uint8 *pSrc = d->m_pSrc; + size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to + // TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & + TDEFL_LZ_DICT_SIZE_MASK, + ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] + << TDEFL_LZ_HASH_SHIFT) ^ + d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( + src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) { + mz_uint8 c = *pSrc++; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + ins_pos++; + } + } else { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & + TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] + << (TDEFL_LZ_HASH_SHIFT * 2)) ^ + (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] + << TDEFL_LZ_HASH_SHIFT) ^ + c) & + (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = + MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; + cur_match_dist = 0; + cur_match_len = + d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); + cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; + while (cur_match_len < d->m_lookahead_size) { + if (d->m_dict[cur_pos + cur_match_len] != c) break; + cur_match_len++; + } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) + cur_match_len = 0; + else + cur_match_dist = 1; + } + } else { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, + d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && + (cur_match_dist >= 8U * 1024U)) || + (cur_pos == cur_match_dist) || + ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) { + if (cur_match_len > d->m_saved_match_len) { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; + len_to_move = cur_match_len; + } else { + d->m_saved_lit = d->m_dict[cur_pos]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + } else { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; + d->m_saved_match_len = 0; + } + } else if (!cur_match_dist) + tdefl_record_literal(d, + d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || + (cur_match_len >= 128)) { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } else { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = + MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output + // buffer. + if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ((d->m_total_lz_bytes > 31 * 1024) && + (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= + d->m_total_lz_bytes) || + (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { + int n; + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { + if (d->m_pIn_buf_size) { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, + d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, + d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE + : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, + size_t *pIn_buf_size, void *pOut_buf, + size_t *pOut_buf_size, tdefl_flush flush) { + if (!d) { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; + d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; + d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); + d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if (((d->m_pPut_buf_func != NULL) == + ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || + (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || + (pIn_buf_size && *pIn_buf_size && !pIn_buf) || + (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | + TDEFL_RLE_MATCHES)) == 0)) { + if (!tdefl_compress_fast(d)) return d->m_prev_return_status; + } else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && + (pIn_buf)) + d->m_adler32 = + (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, + d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && + (!d->m_output_flush_remaining)) { + if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { + MZ_CLEAR_OBJ(d->m_hash); + MZ_CLEAR_OBJ(d->m_next); + d->m_dict_size = 0; + } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, + size_t in_buf_size, tdefl_flush flush) { + MZ_ASSERT(d->m_pPut_buf_func); + return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + d->m_pPut_buf_func = pPut_buf_func; + d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); + d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; + d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = + d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = + d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; + d->m_pOutput_buf_end = d->m_output_buf; + d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; + d->m_adler32 = 1; + d->m_pIn_buf = NULL; + d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; + d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; + d->m_pSrc = NULL; + d->m_src_buf_left = 0; + d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, + sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, + sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, + tdefl_put_buf_func_ptr pPut_buf_func, + void *pPut_buf_user, int flags) { + tdefl_compressor *pComp; + mz_bool succeeded; + if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == + TDEFL_STATUS_OKAY); + succeeded = + succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == + TDEFL_STATUS_DONE); + MZ_FREE(pComp); + return succeeded; +} + +typedef struct { + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, + void *pUser) { + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) { + size_t new_capacity = p->m_capacity; + mz_uint8 *pNew_buf; + if (!p->m_expandable) return MZ_FALSE; + do { + new_capacity = MZ_MAX(128U, new_capacity << 1U); + } while (new_size > new_capacity); + pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); + if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; + p->m_capacity = new_capacity; + } + memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); + p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, + size_t *pOut_len, int flags) { + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) + return MZ_FALSE; + else + *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output( + pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return NULL; + *pOut_len = out_buf.m_size; + return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, + const void *pSrc_buf, size_t src_buf_len, + int flags) { + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8 *)pOut_buf; + out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output( + pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, + 128, 256, 512, 768, 1500}; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we +// want a bit more compression and it's fine if throughput to fall off a cliff +// on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, + int strategy) { + mz_uint comp_flags = + s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | + ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) + comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) + comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) + comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) + comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) + comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif // MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) // nonstandard extension used : non-constant + // aggregate initializer (also supported by GNU + // C and C99, so no big deal) +#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is + // deprecated. Instead, use the ISO C and C++ + // conformant name: _strdup. +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public +// domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files +// generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, + int h, int num_chans, + size_t *pLen_out, + mz_uint level, mz_bool flip) { + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was + // defined. + static const mz_uint s_tdefl_png_num_probes[11] = { + 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; + tdefl_compressor *pComp = + (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + tdefl_output_buffer out_buf; + int i, bpl = w * num_chans, y, z; + mz_uint32 c; + *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); + out_buf.m_expandable = MZ_TRUE; + out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); + if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { + MZ_FREE(pComp); + return NULL; + } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init( + pComp, tdefl_output_buffer_putter, &out_buf, + s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { + tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); + tdefl_compress_buffer(pComp, + (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, + bpl, TDEFL_NO_FLUSH); + } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != + TDEFL_STATUS_DONE) { + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + // write real header + *pLen_out = out_buf.m_size - 41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41] = {0x89, + 0x50, + 0x4e, + 0x47, + 0x0d, + 0x0a, + 0x1a, + 0x0a, + 0x00, + 0x00, + 0x00, + 0x0d, + 0x49, + 0x48, + 0x44, + 0x52, + 0, + 0, + (mz_uint8)(w >> 8), + (mz_uint8)w, + 0, + 0, + (mz_uint8)(h >> 8), + (mz_uint8)h, + 8, + chans[num_chans], + 0, + 0, + 0, + 0, + 0, + 0, + 0, + (mz_uint8)(*pLen_out >> 24), + (mz_uint8)(*pLen_out >> 16), + (mz_uint8)(*pLen_out >> 8), + (mz_uint8)*pLen_out, + 0x49, + 0x44, + 0x41, + 0x54}; + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); + for (i = 0; i < 4; ++i, c <<= 8) + ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter( + "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { + *pLen_out = 0; + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, + *pLen_out + 4); + for (i = 0; i < 4; ++i, c <<= 8) + (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; + MZ_FREE(pComp); + return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, + int num_chans, size_t *pLen_out) { + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we + // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's + // where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, + pLen_out, 6, MZ_FALSE); +} + +// ------------------- .ZIP archive reading + +#ifndef MINIZ_NO_ARCHIVE_APIS +#error "No arvhive APIs" + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include +#include + +#if defined(_MSC_VER) || defined(__MINGW64__) +static FILE *mz_fopen(const char *pFilename, const char *pMode) { + FILE *pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; +} +static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { + FILE *pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; + return pFile; +} +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN mz_fopen +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 _ftelli64 +#define MZ_FSEEK64 _fseeki64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN mz_freopen +#define MZ_DELETE_FILE remove +#elif defined(__MINGW32__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__TINYC__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftell +#define MZ_FSEEK64 fseek +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen64(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT stat64 +#define MZ_FILE_STAT stat64 +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) +#define MZ_DELETE_FILE remove +#else +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FILE FILE +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello +#define MZ_FSEEK64 fseeko +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#endif // #ifdef _MSC_VER +#endif // #ifdef MINIZ_NO_STDIO + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + +// Various ZIP archive enums. To completely avoid cross platform compiler +// alignment and platform endian issues, miniz.c doesn't use structs for any of +// this stuff. +enum { + // ZIP archive identifiers and record sizes + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, + MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + // Central directory header record offsets + MZ_ZIP_CDH_SIG_OFS = 0, + MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, + MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, + MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, + MZ_ZIP_CDH_FILE_TIME_OFS = 12, + MZ_ZIP_CDH_FILE_DATE_OFS = 14, + MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, + MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, + MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, + MZ_ZIP_CDH_DISK_START_OFS = 34, + MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, + MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + // Local directory header offsets + MZ_ZIP_LDH_SIG_OFS = 0, + MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, + MZ_ZIP_LDH_BIT_FLAG_OFS = 6, + MZ_ZIP_LDH_METHOD_OFS = 8, + MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, + MZ_ZIP_LDH_CRC32_OFS = 14, + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, + MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + // End of central directory offsets + MZ_ZIP_ECDH_SIG_OFS = 0, + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, + MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, + MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, + MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, +}; + +typedef struct { + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; +} mz_zip_array; + +struct mz_zip_internal_state_tag { + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + MZ_FILE *m_pFile; + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; +}; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \ + (array_ptr)->m_element_size = element_size +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \ + ((element_type *)((array_ptr)->m_p))[index] + +static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, + mz_zip_array *pArray) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); +} + +static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t min_new_capacity, + mz_uint growing) { + void *pNew_p; + size_t new_capacity = min_new_capacity; + MZ_ASSERT(pArray->m_element_size); + if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; + if (growing) { + new_capacity = MZ_MAX(1, pArray->m_capacity); + while (new_capacity < min_new_capacity) new_capacity *= 2; + } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, + pArray->m_element_size, new_capacity))) + return MZ_FALSE; + pArray->m_p = pNew_p; + pArray->m_capacity = new_capacity; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t new_capacity, + mz_uint growing) { + if (new_capacity > pArray->m_capacity) { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) + return MZ_FALSE; + } + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t new_size, + mz_uint growing) { + if (new_size > pArray->m_capacity) { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) + return MZ_FALSE; + } + pArray->m_size = new_size; + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, + mz_zip_array *pArray, + size_t n) { + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, + mz_zip_array *pArray, + const void *pElements, + size_t n) { + size_t orig_size = pArray->m_size; + if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) + return MZ_FALSE; + memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, + pElements, n * pArray->m_element_size); + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; + tm.tm_mon = ((dos_date >> 5) & 15) - 1; + tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; + tm.tm_min = (dos_time >> 5) & 63; + tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); +} + +static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, + mz_uint16 *pDOS_date) { +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) { + *pDOS_date = 0; + *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + + ((tm->tm_mon + 1) << 5) + tm->tm_mday); +} +#endif + +#ifndef MINIZ_NO_STDIO +static mz_bool mz_zip_get_file_modified_time(const char *pFilename, + mz_uint16 *pDOS_time, + mz_uint16 *pDOS_date) { +#ifdef MINIZ_NO_TIME + (void)pFilename; + *pDOS_date = *pDOS_time = 0; +#else + struct MZ_FILE_STAT_STRUCT file_stat; + // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 + // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. + if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; + mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); +#endif // #ifdef MINIZ_NO_TIME + return MZ_TRUE; +} + +#ifndef MINIZ_NO_TIME +static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, + time_t modified_time) { + struct utimbuf t; + t.actime = access_time; + t.modtime = modified_time; + return !utime(pFilename, &t); +} +#endif // #ifndef MINIZ_NO_TIME +#endif // #ifndef MINIZ_NO_STDIO + +static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, + mz_uint32 flags) { + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, + sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, + sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, + sizeof(mz_uint32)); + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool +mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, + const mz_zip_array *pCentral_dir_offsets, + mz_uint l_index, mz_uint r_index) { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, + l_index)), + *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), + r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; + pL++; + pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); +} + +#define MZ_SWAP_UINT32(a, b) \ + do { \ + mz_uint32 t = a; \ + a = b; \ + b = t; \ + } \ + MZ_MACRO_END + +// Heap sort of lowercased filenames, used to help accelerate plain central +// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), +// but it could allocate memory.) +static void mz_zip_reader_sort_central_dir_offsets_by_filename( + mz_zip_archive *pZip) { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( + &pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + int start = (size - 2) >> 1, end; + while (start >= 0) { + int child, root = start; + for (;;) { + if ((child = (root << 1) + 1) >= size) break; + child += + (((child + 1) < size) && + (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[child], pIndices[child + 1]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + start--; + } + + end = size - 1; + while (end > 0) { + int child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for (;;) { + if ((child = (root << 1) + 1) >= end) break; + child += + (((child + 1) < end) && + mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[child], pIndices[child + 1])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, + pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + end--; + } +} + +static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, + mz_uint32 flags) { + mz_uint cdir_size, num_this_disk, cdir_disk_index; + mz_uint64 cdir_ofs; + mz_int64 cur_file_ofs; + const mz_uint8 *p; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; + mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + mz_bool sort_central_dir = + ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); + // Basic sanity checks - reject files which are too small, and check the first + // 4 bytes of the file to make sure a local header is there. + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + // Find the end of central directory record by scanning the file from the end + // towards the beginning. + cur_file_ofs = + MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for (;;) { + int i, + n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + for (i = n - 4; i >= 0; --i) + if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; + if (i >= 0) { + cur_file_ofs += i; + break; + } + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= + (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return MZ_FALSE; + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + // Read and verify the end of central directory record. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || + ((pZip->m_total_files = + MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != + MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) + return MZ_FALSE; + + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + if (((num_this_disk | cdir_disk_index) != 0) && + ((num_this_disk != 1) || (cdir_disk_index != 1))) + return MZ_FALSE; + + if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < + pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return MZ_FALSE; + + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) { + mz_uint i, n; + + // Read the entire central directory into a heap block, and allocate another + // heap block to hold the unsorted central dir file record offsets, and + // another to hold the sorted indices. + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, + MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, + pZip->m_total_files, MZ_FALSE))) + return MZ_FALSE; + + if (sort_central_dir) { + if (!mz_zip_array_resize(pZip, + &pZip->m_pState->m_sorted_central_dir_offsets, + pZip->m_total_files, MZ_FALSE)) + return MZ_FALSE; + } + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, + pZip->m_pState->m_central_dir.m_p, + cdir_size) != cdir_size) + return MZ_FALSE; + + // Now create an index into the central directory file records, do some + // basic sanity checking on each record, and check for zip64 entries (which + // are not yet supported). + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { + mz_uint total_header_size, comp_size, decomp_size, disk_index; + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || + (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return MZ_FALSE; + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + i) = + (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + if (sort_central_dir) + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, + mz_uint32, i) = i; + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && + (decomp_size != comp_size)) || + (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || + (comp_size == 0xFFFFFFFF)) + return MZ_FALSE; + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return MZ_FALSE; + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > + n) + return MZ_FALSE; + n -= total_header_size; + p += total_header_size; + } + } + + if (sort_central_dir) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, + mz_uint32 flags) { + if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; + if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; + pZip->m_archive_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) + ? 0 + : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; +} + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, + size_t size, mz_uint32 flags) { + if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; +#ifdef __cplusplus + pZip->m_pState->m_pMem = const_cast(pMem); +#else + pZip->m_pState->m_pMem = (void *)pMem; +#endif + pZip->m_pState->m_mem_size = size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, + void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || + (((cur_ofs != (mz_int64)file_ofs)) && + (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint32 flags) { + mz_uint64 file_size; + MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) return MZ_FALSE; + if (MZ_FSEEK64(pFile, 0, SEEK_END)) { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + file_size = MZ_FTELL64(pFile); + if (!mz_zip_reader_init_internal(pZip, flags)) { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + if (!mz_zip_reader_read_central_dir(pZip, flags)) { + mz_zip_reader_end(pZip); + return MZ_FALSE; + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { + return pZip ? pZip->m_total_files : 0; +} + +static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh( + mz_zip_archive *pZip, mz_uint file_index) { + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + file_index)); +} + +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, + mz_uint file_index) { + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) return MZ_FALSE; + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & 1); +} + +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, + mz_uint file_index) { + mz_uint filename_len, external_attr; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) return MZ_FALSE; + + // First see if the filename ends with a '/' character. + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + // Bugfix: This code was also checking if the internal attribute was non-zero, + // which wasn't correct. + // Most/all zip writers (hopefully) set DOS file/directory attributes in the + // low 16-bits, so check for the DOS directory flag and ignore the source OS + // ID in the created by field. + // FIXME: Remove this check? Is it necessary - we already check the filename. + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((external_attr & 0x10) != 0) return MZ_TRUE; + + return MZ_FALSE; +} + +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, + mz_zip_archive_file_stat *pStat) { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if ((!p) || (!pStat)) return MZ_FALSE; + + // Unpack the central directory record. + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = + mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), + MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + // Copy as much of the filename and comment as possible. + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, + p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), + n); + pStat->m_comment[n] = '\0'; + + return MZ_TRUE; +} + +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, + char *pFilename, mz_uint filename_buf_size) { + mz_uint n; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + if (!p) { + if (filename_buf_size) pFilename[0] = '\0'; + return 0; + } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; +} + +static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, + const char *pB, + mz_uint len, + mz_uint flags) { + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; + return MZ_TRUE; +} + +static MZ_FORCEINLINE int mz_zip_reader_filename_compare( + const mz_zip_array *pCentral_dir_array, + const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, + mz_uint r_len) { + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( + pCentral_dir_array, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, + l_index)), + *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; + pL++; + pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); +} + +static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, + const char *pFilename) { + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( + &pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const int size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + int l = 0, h = size - 1; + while (l <= h) { + int m = (l + h) >> 1, file_index = pIndices[m], + comp = + mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, + file_index, pFilename, filename_len); + if (!comp) + return file_index; + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + return -1; +} + +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, + const char *pComment, mz_uint flags) { + mz_uint file_index; + size_t name_len, comment_len; + if ((!pZip) || (!pZip->m_pState) || (!pName) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return -1; + if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && + (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) + return mz_zip_reader_locate_file_binary_search(pZip, pName); + name_len = strlen(pName); + if (name_len > 0xFFFF) return -1; + comment_len = pComment ? strlen(pComment) : 0; + if (comment_len > 0xFFFF) return -1; + for (file_index = 0; file_index < pZip->m_total_files; file_index++) { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT( + &pZip->m_pState->m_central_dir, mz_uint8, + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, + file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = + (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) continue; + if (comment_len) { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), + file_comment_len = + MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || + (!mz_zip_reader_string_equal(pComment, pFile_comment, + file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { + int ofs = filename_len - 1; + do { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || + (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; + filename_len -= ofs; + } + if ((filename_len == name_len) && + (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) + return file_index; + } + return -1; +} + +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, + mz_uint file_index, void *pBuf, + size_t buf_size, mz_uint flags, + void *pUser_read_buf, + size_t user_read_buf_size) { + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, + out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((buf_size) && (!pBuf)) return MZ_FALSE; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips + // with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have + // compressed deflate data which inflates to 0 bytes, but these entries claim + // to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && + (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Ensure supplied output buffer is large enough. + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size + : file_stat.m_uncomp_size; + if (buf_size < needed_size) return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, + (size_t)needed_size) != needed_size) + return MZ_FALSE; + return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || + (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, + (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); + } + + // Decompress the file either directly from memory or from a file input + // buffer. + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) { + // Read directly from the archive in memory. + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } else if (pUser_read_buf) { + // Use a user provided read buffer. + if (!user_read_buf_size) return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } else { + // Temporarily allocate a read buffer. + read_buf_size = + MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && + (read_buf_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) +#endif + return MZ_FALSE; + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do { + size_t in_buf_size, + out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress( + &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, + (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | + (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || + (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, + (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( + mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, + mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, + flags, pUser_read_buf, + user_read_buf_size); +} + +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, + void *pBuf, size_t buf_size, + mz_uint flags) { + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, + flags, NULL, 0); +} + +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, + const char *pFilename, void *pBuf, + size_t buf_size, mz_uint flags) { + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, + buf_size, flags, NULL, 0); +} + +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, + size_t *pSize, mz_uint flags) { + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) *pSize = 0; + if (!p) return NULL; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) +#endif + return NULL; + if (NULL == + (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + return NULL; + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, + flags)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) *pSize = (size_t)alloc_size; + return pBuf; +} + +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, + const char *pFilename, size_t *pSize, + mz_uint flags) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) { + if (pSize) *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); +} + +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, + mz_uint file_index, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags) { + int status = TINFL_STATUS_DONE; + mz_uint file_crc32 = MZ_CRC32_INIT; + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, + out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; + void *pWrite_buf = NULL; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + + // Empty file, or a directory (but not always a directory - I've seen odd zips + // with directories that have compressed data which inflates to 0 bytes) + if (!file_stat.m_comp_size) return MZ_TRUE; + + // Entry is a subdirectory (I've seen old zips with dir entries which have + // compressed deflate data which inflates to 0 bytes, but these entries claim + // to uncompress to 512 bytes in the headers). + // I'm torn how to handle this case - should it fail instead? + if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; + + // Encryption and patch files are not supported. + if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; + + // This function only supports stored and deflate. + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && + (file_stat.m_method != MZ_DEFLATED)) + return MZ_FALSE; + + // Read and parse the local directory entry. + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return MZ_FALSE; + + // Decompress the file either directly from memory or from a file input + // buffer. + if (pZip->m_pState->m_pMem) { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } else { + read_buf_size = + MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + (size_t)read_buf_size))) + return MZ_FALSE; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { + // The file is stored or the caller has requested the compressed data. + if (pZip->m_pState->m_pMem) { +#ifdef _MSC_VER + if (((0, sizeof(size_t) == sizeof(mz_uint32))) && + (file_stat.m_comp_size > 0xFFFFFFFF)) +#else + if (((sizeof(size_t) == sizeof(mz_uint32))) && + (file_stat.m_comp_size > 0xFFFFFFFF)) +#endif + return MZ_FALSE; + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, + (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + status = TINFL_STATUS_FAILED; + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = + (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, + (size_t)file_stat.m_comp_size); + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } else { + while (comp_remaining) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + file_crc32 = (mz_uint32)mz_crc32( + file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } else { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, + TINFL_LZ_DICT_SIZE))) + status = TINFL_STATUS_FAILED; + else { + do { + mz_uint8 *pWrite_buf_cur = + (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, + out_buf_size = + TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, + (size_t)read_buf_avail) != read_buf_avail) { + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress( + &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, + (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, + comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != + out_buf_size) { + status = TINFL_STATUS_FAILED; + break; + } + file_crc32 = + (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || + (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && + (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { + // Make sure the entire file was decompressed, and check its CRC. + if ((out_buf_ofs != file_stat.m_uncomp_size) || + (file_crc32 != file_stat.m_crc32)) + status = TINFL_STATUS_FAILED; + } + + if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, + const char *pFilename, + mz_file_write_func pCallback, + void *pOpaque, mz_uint flags) { + int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, + flags); +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, + const void *pBuf, size_t n) { + (void)ofs; + return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); +} + +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, + const char *pDst_filename, + mz_uint flags) { + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) return MZ_FALSE; + status = mz_zip_reader_extract_to_callback( + pZip, file_index, mz_zip_file_write_callback, pFile, flags); + if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; +#ifndef MINIZ_NO_TIME + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + return status; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || + (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + + if (pZip->m_pState) { + mz_zip_internal_state *pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, + const char *pArchive_filename, + const char *pDst_filename, + mz_uint flags) { + int file_index = + mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); + if (file_index < 0) return MZ_FALSE; + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); +} +#endif + +// ------------------- .ZIP archive writing + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); +} +static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); + p[2] = (mz_uint8)(v >> 16); + p[3] = (mz_uint8)(v >> 24); +} +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) + +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || + (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return MZ_FALSE; + + if (pZip->m_file_offset_alignment) { + // Ensure user specified file offset alignment is a power of 2. + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return MZ_FALSE; + } + + if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; + if (!pZip->m_pFree) pZip->m_pFree = def_free_func; + if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return MZ_FALSE; + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, + sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, + sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, + sizeof(mz_uint32)); + return MZ_TRUE; +} + +static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); +#ifdef _MSC_VER + if ((!n) || + ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#else + if ((!n) || + ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) +#endif + return 0; + if (new_size > pState->m_mem_capacity) { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); + while (new_capacity < new_size) new_capacity *= 2; + if (NULL == (pNew_block = pZip->m_pRealloc( + pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + return 0; + pState->m_pMem = pNew_block; + pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; +} + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, + size_t size_to_reserve_at_beginning, + size_t initial_allocation_size) { + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, + size_to_reserve_at_beginning))) { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, + const void *pBuf, size_t n) { + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + if (((mz_int64)file_ofs < 0) || + (((cur_ofs != (mz_int64)file_ofs)) && + (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, + mz_uint64 size_to_reserve_at_beginning) { + MZ_FILE *pFile; + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pIO_opaque = pZip; + if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; + if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + pZip->m_pState->m_pFile = pFile; + if (size_to_reserve_at_beginning) { + mz_uint64 cur_ofs = 0; + char buf[4096]; + MZ_CLEAR_OBJ(buf); + do { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { + mz_zip_writer_end(pZip); + return MZ_FALSE; + } + cur_ofs += n; + size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, + const char *pFilename) { + mz_zip_internal_state *pState; + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return MZ_FALSE; + // No sense in trying to write to an archive that's already at the support max + // size + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if (pState->m_pFile) { +#ifdef MINIZ_NO_STDIO + pFilename; + return MZ_FALSE; +#else + // Archive is being read from stdio - try to reopen as writable. + if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; + if (!pFilename) return MZ_FALSE; + pZip->m_pWrite = mz_zip_file_write_func; + if (NULL == + (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { + // The mz_zip_archive is now in a bogus state because pState->m_pFile is + // NULL, so just close it. + mz_zip_reader_end(pZip); + return MZ_FALSE; + } +#endif // #ifdef MINIZ_NO_STDIO + } else if (pState->m_pMem) { + // Archive lives in a memory block. Assume it's from the heap that we can + // resize using the realloc callback. + if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + } + // Archive is being read via a user provided read function - make sure the + // user has specified a write function too. + else if (!pZip->m_pWrite) + return MZ_FALSE; + + // Start writing new files at the archive's current central directory + // location. + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + pZip->m_central_directory_file_ofs = 0; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, + const void *pBuf, size_t buf_size, + mz_uint level_and_flags) { + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, + level_and_flags, 0, 0); +} + +typedef struct { + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; +} mz_zip_writer_add_state; + +static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, + void *pUser) { + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, + pState->m_cur_archive_file_ofs, pBuf, + len) != len) + return MZ_FALSE; + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_local_dir_header( + mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, + mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, + mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, + mz_uint16 dos_time, mz_uint16 dos_date) { + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_central_dir_header( + mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, + mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, + mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, + mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, + mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_add_to_central_dir( + mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, + const void *pExtra, mz_uint16 extra_size, const void *pComment, + mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, + mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, + mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, + mz_uint32 ext_attributes) { + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + // No zip64 support yet + if ((local_header_ofs > 0xFFFFFFFF) || + (((mz_uint64)pState->m_central_dir.m_size + + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + + comment_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_central_dir_header( + pZip, central_dir_header, filename_size, extra_size, comment_size, + uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, + dos_date, local_header_ofs, ext_attributes)) + return MZ_FALSE; + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, + filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, + extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, + comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, + ¢ral_dir_ofs, 1))) { + // Try to push the central directory array back into its original state. + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { + // Basic ZIP archive filename validity checks: Valid filenames cannot start + // with a forward slash, cannot contain a drive letter, and cannot use + // DOS-style backward slashes. + if (*pArchive_name == '/') return MZ_FALSE; + while (*pArchive_name) { + if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; + pArchive_name++; + } + return MZ_TRUE; +} + +static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment( + mz_zip_archive *pZip) { + mz_uint32 n; + if (!pZip->m_file_offset_alignment) return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (pZip->m_file_offset_alignment - n) & + (pZip->m_file_offset_alignment - 1); +} + +static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, + mz_uint64 cur_file_ofs, mz_uint32 n) { + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return MZ_FALSE; + cur_file_ofs += s; + n -= s; + } + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, + const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags, mz_uint64 uncomp_size, + mz_uint32 uncomp_crc32) { + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, + cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + store_data_uncompressed = + ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || + (!pArchive_name) || ((comment_size) && (!pComment)) || + (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + + pState = pZip->m_pState; + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return MZ_FALSE; + // No zip64 support yet + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + +#ifndef MINIZ_NO_TIME + { + time_t cur_time; + time(&cur_time); + mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif // #ifndef MINIZ_NO_TIME + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) return MZ_FALSE; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { + // Set DOS Subdirectory attribute bit. + ext_attributes |= 0x10; + // Subdirectories cannot contain data. + if ((buf_size) || (uncomp_size)) return MZ_FALSE; + } + + // Try to do any allocations before writing to the archive, so if an + // allocation fails the file remains unmodified. (A good idea if we're doing + // an in-place modification.) + if ((!mz_zip_array_ensure_room( + pZip, &pState->m_central_dir, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || + (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return MZ_FALSE; + + if ((!store_data_uncompressed) && (buf_size)) { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return MZ_FALSE; + } + + if (!mz_zip_writer_write_zeros( + pZip, cur_archive_file_ofs, + num_alignment_padding_bytes + sizeof(local_dir_header))) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + cur_archive_file_ofs += + num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, + archive_name_size) != archive_name_size) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { + uncomp_crc32 = + (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + if (store_data_uncompressed) { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, + buf_size) != buf_size) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; + } else if (buf_size) { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, + tdefl_create_comp_flags_from_zip_params( + level, -15, MZ_DEFAULT_STRATEGY)) != + TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != + TDEFL_STATUS_DONE)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header( + pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, + comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, + sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir( + pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, + comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, + dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, + const char *pSrc_filename, const void *pComment, + mz_uint16 comment_size, + mz_uint level_and_flags) { + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, + cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, + comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + MZ_FILE *pSrc_file = NULL; + + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + if ((!pZip) || (!pZip->m_pState) || + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || + ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > 0xFFFF) return MZ_FALSE; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + comment_size + archive_name_size) > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) + return MZ_FALSE; + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) return MZ_FALSE; + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + if (uncomp_size > 0xFFFFFFFF) { + // No zip64 support yet + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + if (uncomp_size <= 3) level = 0; + + if (!mz_zip_writer_write_zeros( + pZip, cur_archive_file_ofs, + num_alignment_padding_bytes + sizeof(local_dir_header))) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + cur_archive_file_ofs += + num_alignment_padding_bytes + sizeof(local_dir_header); + + MZ_CLEAR_OBJ(local_dir_header); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, + archive_name_size) != archive_name_size) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + cur_archive_file_ofs += archive_name_size; + + if (uncomp_size) { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = + pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) { + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + if (!level) { + while (uncomp_remaining) { + mz_uint n = + (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || + (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, + n) != n)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + uncomp_crc32 = + (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } else { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, + tdefl_create_comp_flags_from_zip_params( + level, -15, MZ_DEFAULT_STRATEGY)) != + TDEFL_STATUS_OKAY) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + for (;;) { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, + (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + + if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) + break; + + uncomp_crc32 = (mz_uint32)mz_crc32( + uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + status = tdefl_compress_buffer( + pComp, pRead_buf, in_buf_size, + uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) { + result = MZ_TRUE; + break; + } else if (status != TDEFL_STATUS_OKAY) + break; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + MZ_FCLOSE(pSrc_file); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + + method = MZ_DEFLATED; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + MZ_FCLOSE(pSrc_file); + pSrc_file = NULL; + + // no zip64 support yet + if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) + return MZ_FALSE; + + if (!mz_zip_writer_create_local_dir_header( + pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, + comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) + return MZ_FALSE; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, + sizeof(local_dir_header)) != sizeof(local_dir_header)) + return MZ_FALSE; + + if (!mz_zip_writer_add_to_central_dir( + pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, + comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, + dos_time, dos_date, local_dir_header_ofs, ext_attributes)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} +#endif // #ifndef MINIZ_NO_STDIO + +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, + mz_zip_archive *pSource_zip, + mz_uint file_index) { + mz_uint n, bit_flags, num_alignment_padding_bytes; + mz_uint64 comp_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 + local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / + sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; + const mz_uint8 *pSrc_central_header; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + if (NULL == + (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) + return MZ_FALSE; + pState = pZip->m_pState; + + num_alignment_padding_bytes = + mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + // no zip64 support yet + if ((pZip->m_total_files == 0xFFFF) || + ((pZip->m_archive_size + num_alignment_padding_bytes + + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > + 0xFFFFFFFF)) + return MZ_FALSE; + + cur_src_file_ofs = + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + cur_dst_file_ofs = pZip->m_archive_size; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, + pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return MZ_FALSE; + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, + num_alignment_padding_bytes)) + return MZ_FALSE; + cur_dst_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == + 0); + } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return MZ_FALSE; + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + comp_bytes_remaining = + n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + + if (NULL == (pBuf = pZip->m_pAlloc( + pZip->m_pAlloc_opaque, 1, + (size_t)MZ_MAX(sizeof(mz_uint32) * 4, + MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, + comp_bytes_remaining))))) + return MZ_FALSE; + + while (comp_bytes_remaining) { + n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, + n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + cur_dst_file_ofs += n; + + comp_bytes_remaining -= n; + } + + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) { + // Copy data descriptor + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, + sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return MZ_FALSE; + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + // no zip64 support yet + if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; + + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, + local_dir_header_ofs); + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return MZ_FALSE; + + n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + if (!mz_zip_array_push_back( + pZip, &pState->m_central_dir, + pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; + n = (mz_uint32)orig_central_dir_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, + MZ_FALSE); + return MZ_FALSE; + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return MZ_FALSE; + + pState = pZip->m_pState; + + // no zip64 support yet + if ((pZip->m_total_files > 0xFFFF) || + ((pZip->m_archive_size + pState->m_central_dir.m_size + + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) + return MZ_FALSE; + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) { + // Write central directory + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, + pState->m_central_dir.m_p, + (size_t)central_dir_size) != central_dir_size) + return MZ_FALSE; + pZip->m_archive_size += central_dir_size; + } + + // Write end of central directory record + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, + pZip->m_total_files); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, + sizeof(hdr)) != sizeof(hdr)) + return MZ_FALSE; +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; +#endif // #ifndef MINIZ_NO_STDIO + + pZip->m_archive_size += sizeof(hdr); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, + size_t *pSize) { + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; + if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; + if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; + + *pBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || + ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && + (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + return MZ_FALSE; + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) { + MZ_FCLOSE(pState->m_pFile); + pState->m_pFile = NULL; + } +#endif // #ifndef MINIZ_NO_STDIO + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_add_mem_to_archive_file_in_place( + const char *pZip_filename, const char *pArchive_name, const void *pBuf, + size_t buf_size, const void *pComment, mz_uint16 comment_size, + mz_uint level_and_flags) { + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + MZ_CLEAR_OBJ(zip_archive); + if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || + ((comment_size) && (!pComment)) || + ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + return MZ_FALSE; + if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { + // Create a new archive. + if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) + return MZ_FALSE; + created_new_archive = MZ_TRUE; + } else { + // Append to an existing archive. + if (!mz_zip_reader_init_file( + &zip_archive, pZip_filename, + level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return MZ_FALSE; + if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { + mz_zip_reader_end(&zip_archive); + return MZ_FALSE; + } + } + status = + mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, + pComment, comment_size, level_and_flags, 0, 0); + // Always finalize, even if adding failed for some reason, so we have a valid + // central directory. (This may not always succeed, but we can try.) + if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; + if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; + if ((!status) && (created_new_archive)) { + // It's a new archive and something went wrong, so just delete it. + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + return status; +} + +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, + const char *pArchive_name, + size_t *pSize, mz_uint flags) { + int file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) return NULL; + + MZ_CLEAR_OBJ(zip_archive); + if (!mz_zip_reader_init_file( + &zip_archive, pZip_filename, + flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) + return NULL; + + if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, + flags)) >= 0) + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + + mz_zip_reader_end(&zip_archive); + return p; +} + +#endif // #ifndef MINIZ_NO_STDIO + +#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +#endif // #ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +} +#endif + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // MINIZ_HEADER_FILE_ONLY + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ + +// ---------------------- end of miniz ---------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +} // namespace miniz +#else + +// Reuse MINIZ_LITTE_ENDIAN macro + +#if defined(__sparcv9) +// Big endian +#else +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif +#endif + +#endif // TINYEXR_USE_MINIZ + +// static bool IsBigEndian(void) { +// union { +// unsigned int i; +// char c[4]; +// } bint = {0x01020304}; +// +// return bint.c[0] == 1; +//} + +static void SetErrorMessage(const std::string &msg, const char **err) { + if (err) { +#ifdef _WIN32 + (*err) = _strdup(msg.c_str()); +#else + (*err) = strdup(msg.c_str()); +#endif + } +} + +static const int kEXRVersionSize = 8; + +static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; +} + +static void swap2(unsigned short *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + unsigned short tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[1]; + dst[1] = src[0]; +#endif +} + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +static void cpy4(int *dst_val, const int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static void cpy4(float *dst_val, const float *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +static void swap4(unsigned int *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + unsigned int tmp = *val; + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; +#endif +} + +#if 0 +static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { + unsigned char *dst = reinterpret_cast(dst_val); + const unsigned char *src = reinterpret_cast(src_val); + + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; +} +#endif + +static void swap8(tinyexr::tinyexr_uint64 *val) { +#ifdef MINIZ_LITTLE_ENDIAN + (void)val; +#else + tinyexr::tinyexr_uint64 tmp = (*val); + unsigned char *dst = reinterpret_cast(val); + unsigned char *src = reinterpret_cast(&tmp); + + dst[0] = src[7]; + dst[1] = src[6]; + dst[2] = src[5]; + dst[3] = src[4]; + dst[4] = src[3]; + dst[5] = src[2]; + dst[6] = src[1]; + dst[7] = src[0]; +#endif +} + +// https://gist.github.com/rygorous/2156668 +// Reuse MINIZ_LITTLE_ENDIAN flag from miniz. +union FP32 { + unsigned int u; + float f; + struct { +#if MINIZ_LITTLE_ENDIAN + unsigned int Mantissa : 23; + unsigned int Exponent : 8; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 8; + unsigned int Mantissa : 23; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif + +union FP16 { + unsigned short u; + struct { +#if MINIZ_LITTLE_ENDIAN + unsigned int Mantissa : 10; + unsigned int Exponent : 5; + unsigned int Sign : 1; +#else + unsigned int Sign : 1; + unsigned int Exponent : 5; + unsigned int Mantissa : 10; +#endif + } s; +}; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static FP32 half_to_float(FP16 h) { + static const FP32 magic = {113 << 23}; + static const unsigned int shifted_exp = 0x7c00 + << 13; // exponent mask after shift + FP32 o; + + o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits + unsigned int exp_ = shifted_exp & o.u; // just the exponent + o.u += (127 - 15) << 23; // exponent adjust + + // handle exponent special cases + if (exp_ == shifted_exp) // Inf/NaN? + o.u += (128 - 16) << 23; // extra exp adjust + else if (exp_ == 0) // Zero/Denormal? + { + o.u += 1 << 23; // extra exp adjust + o.f -= magic.f; // renormalize + } + + o.u |= (h.u & 0x8000U) << 16U; // sign bit + return o; +} + +static FP16 float_to_half_full(FP32 f) { + FP16 o = {0}; + + // Based on ISPC reference code (with minor modifications) + if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) + o.s.Exponent = 0; + else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) + { + o.s.Exponent = 31; + o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf + } else // Normalized number + { + // Exponent unbias the single, then bias the halfp + int newexp = f.s.Exponent - 127 + 15; + if (newexp >= 31) // Overflow, return signed infinity + o.s.Exponent = 31; + else if (newexp <= 0) // Underflow + { + if ((14 - newexp) <= 24) // Mantissa might be non-zero + { + unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit + o.s.Mantissa = mant >> (14 - newexp); + if ((mant >> (13 - newexp)) & 1) // Check for rounding + o.u++; // Round, might overflow into exp bit, but this is OK + } + } else { + o.s.Exponent = static_cast(newexp); + o.s.Mantissa = f.s.Mantissa >> 13; + if (f.s.Mantissa & 0x1000) // Check for rounding + o.u++; // Round, might overflow to inf, this is OK + } + } + + o.s.Sign = f.s.Sign; + return o; +} + +// NOTE: From OpenEXR code +// #define IMF_INCREASING_Y 0 +// #define IMF_DECREASING_Y 1 +// #define IMF_RAMDOM_Y 2 +// +// #define IMF_NO_COMPRESSION 0 +// #define IMF_RLE_COMPRESSION 1 +// #define IMF_ZIPS_COMPRESSION 2 +// #define IMF_ZIP_COMPRESSION 3 +// #define IMF_PIZ_COMPRESSION 4 +// #define IMF_PXR24_COMPRESSION 5 +// #define IMF_B44_COMPRESSION 6 +// #define IMF_B44A_COMPRESSION 7 + +#ifdef __clang__ +#pragma clang diagnostic push + +#if __has_warning("-Wzero-as-null-pointer-constant") +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + +#endif + +static const char *ReadString(std::string *s, const char *ptr, size_t len) { + // Read untile NULL(\0). + const char *p = ptr; + const char *q = ptr; + while ((size_t(q - ptr) < len) && (*q) != 0) { + q++; + } + + if (size_t(q - ptr) >= len) { + (*s) = std::string(); + return NULL; + } + + (*s) = std::string(p, q); + + return q + 1; // skip '\0' +} + +static bool ReadAttribute(std::string *name, std::string *type, + std::vector *data, size_t *marker_size, + const char *marker, size_t size) { + size_t name_len = strnlen(marker, size); + if (name_len == size) { + // String does not have a terminating character. + return false; + } + *name = std::string(marker, name_len); + + marker += name_len + 1; + size -= name_len + 1; + + size_t type_len = strnlen(marker, size); + if (type_len == size) { + return false; + } + *type = std::string(marker, type_len); + + marker += type_len + 1; + size -= type_len + 1; + + if (size < sizeof(uint32_t)) { + return false; + } + + uint32_t data_len; + memcpy(&data_len, marker, sizeof(uint32_t)); + tinyexr::swap4(reinterpret_cast(&data_len)); + + if (data_len == 0) { + if ((*type).compare("string") == 0) { + // Accept empty string attribute. + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t); + + data->resize(1); + (*data)[0] = '\0'; + + return true; + } else { + return false; + } + } + + marker += sizeof(uint32_t); + size -= sizeof(uint32_t); + + if (size < data_len) { + return false; + } + + data->resize(static_cast(data_len)); + memcpy(&data->at(0), marker, static_cast(data_len)); + + *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; + return true; +} + +static void WriteAttributeToMemory(std::vector *out, + const char *name, const char *type, + const unsigned char *data, int len) { + out->insert(out->end(), name, name + strlen(name) + 1); + out->insert(out->end(), type, type + strlen(type) + 1); + + int outLen = len; + tinyexr::swap4(reinterpret_cast(&outLen)); + out->insert(out->end(), reinterpret_cast(&outLen), + reinterpret_cast(&outLen) + sizeof(int)); + out->insert(out->end(), data, data + len); +} + +typedef struct { + std::string name; // less than 255 bytes long + int pixel_type; + int x_sampling; + int y_sampling; + unsigned char p_linear; + unsigned char pad[3]; +} ChannelInfo; + +typedef struct test1{ + std::vector channels; + std::vector attributes; + + int data_window[4]; + int line_order; + int display_window[4]; + float screen_window_center[2]; + float screen_window_width; + float pixel_aspect_ratio; + + int chunk_count; + + // Tiled format + int tile_size_x; + int tile_size_y; + int tile_level_mode; + int tile_rounding_mode; + + unsigned int header_len; + + int compression_type; + + void clear() { + channels.clear(); + attributes.clear(); + + data_window[0] = 0; + data_window[1] = 0; + data_window[2] = 0; + data_window[3] = 0; + line_order = 0; + display_window[0] = 0; + display_window[1] = 0; + display_window[2] = 0; + display_window[3] = 0; + screen_window_center[0] = 0.0f; + screen_window_center[1] = 0.0f; + screen_window_width = 0.0f; + pixel_aspect_ratio = 0.0f; + + chunk_count = 0; + + // Tiled format + tile_size_x = 0; + tile_size_y = 0; + tile_level_mode = 0; + tile_rounding_mode = 0; + + header_len = 0; + compression_type = 0; + } +} HeaderInfo; + +static bool ReadChannelInfo(std::vector &channels, + const std::vector &data) { + const char *p = reinterpret_cast(&data.at(0)); + + for (;;) { + if ((*p) == 0) { + break; + } + ChannelInfo info; + + tinyexr_int64 data_len = static_cast(data.size()) - + (p - reinterpret_cast(data.data())); + if (data_len < 0) { + return false; + } + + p = ReadString(&info.name, p, size_t(data_len)); + if ((p == NULL) && (info.name.empty())) { + // Buffer overrun. Issue #51. + return false; + } + + const unsigned char *data_end = + reinterpret_cast(p) + 16; + if (data_end >= (data.data() + data.size())) { + return false; + } + + memcpy(&info.pixel_type, p, sizeof(int)); + p += 4; + info.p_linear = static_cast(p[0]); // uchar + p += 1 + 3; // reserved: uchar[3] + memcpy(&info.x_sampling, p, sizeof(int)); // int + p += 4; + memcpy(&info.y_sampling, p, sizeof(int)); // int + p += 4; + + tinyexr::swap4(reinterpret_cast(&info.pixel_type)); + tinyexr::swap4(reinterpret_cast(&info.x_sampling)); + tinyexr::swap4(reinterpret_cast(&info.y_sampling)); + + channels.push_back(info); + } + + return true; +} + +static void WriteChannelInfo(std::vector &data, + const std::vector &channels) { + size_t sz = 0; + + // Calculate total size. + for (size_t c = 0; c < channels.size(); c++) { + sz += strlen(channels[c].name.c_str()) + 1; // +1 for \0 + sz += 16; // 4 * int + } + data.resize(sz + 1); + + unsigned char *p = &data.at(0); + + for (size_t c = 0; c < channels.size(); c++) { + memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str())); + p += strlen(channels[c].name.c_str()); + (*p) = '\0'; + p++; + + int pixel_type = channels[c].pixel_type; + int x_sampling = channels[c].x_sampling; + int y_sampling = channels[c].y_sampling; + tinyexr::swap4(reinterpret_cast(&pixel_type)); + tinyexr::swap4(reinterpret_cast(&x_sampling)); + tinyexr::swap4(reinterpret_cast(&y_sampling)); + + memcpy(p, &pixel_type, sizeof(int)); + p += sizeof(int); + + (*p) = channels[c].p_linear; + p += 4; + + memcpy(p, &x_sampling, sizeof(int)); + p += sizeof(int); + + memcpy(p, &y_sampling, sizeof(int)); + p += sizeof(int); + } + + (*p) = '\0'; +} + +static void CompressZip(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + +#if TINYEXR_USE_MINIZ + // + // Compress the data using miniz + // + + miniz::mz_ulong outSize = miniz::mz_compressBound(src_size); + int ret = miniz::mz_compress( + dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + assert(ret == miniz::MZ_OK); + (void)ret; + + compressedSize = outSize; +#else + uLong outSize = compressBound(static_cast(src_size)); + int ret = compress(dst, &outSize, static_cast(&tmpBuf.at(0)), + src_size); + assert(ret == Z_OK); + + compressedSize = outSize; +#endif + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } +} + +static bool DecompressZip(unsigned char *dst, + unsigned long *uncompressed_size /* inout */, + const unsigned char *src, unsigned long src_size) { + if ((*uncompressed_size) == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + std::vector tmpBuf(*uncompressed_size); + +#if TINYEXR_USE_MINIZ + int ret = + miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (miniz::MZ_OK != ret) { + return false; + } +#else + int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (Z_OK != ret) { + return false; + } +#endif + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfZipCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (*uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + (*uncompressed_size); + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +// RLE code from OpenEXR -------------------------------------- + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsign-conversion" +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) // nonstandard extension used : non-constant + // aggregate initializer (also supported by GNU + // C and C99, so no big deal) +#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to + // 'int', possible loss of data +#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is + // deprecated. Instead, use the ISO C and C++ + // conformant name: _strdup. +#endif + +const int MIN_RUN_LENGTH = 3; +const int MAX_RUN_LENGTH = 127; + +// +// Compress an array of bytes, using run-length encoding, +// and return the length of the compressed data. +// + +static int rleCompress(int inLength, const char in[], signed char out[]) { + const char *inEnd = in + inLength; + const char *runStart = in; + const char *runEnd = in + 1; + signed char *outWrite = out; + + while (runStart < inEnd) { + while (runEnd < inEnd && *runStart == *runEnd && + runEnd - runStart - 1 < MAX_RUN_LENGTH) { + ++runEnd; + } + + if (runEnd - runStart >= MIN_RUN_LENGTH) { + // + // Compressable run + // + + *outWrite++ = static_cast(runEnd - runStart) - 1; + *outWrite++ = *(reinterpret_cast(runStart)); + runStart = runEnd; + } else { + // + // Uncompressable run + // + + while (runEnd < inEnd && + ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || + (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && + runEnd - runStart < MAX_RUN_LENGTH) { + ++runEnd; + } + + *outWrite++ = static_cast(runStart - runEnd); + + while (runStart < runEnd) { + *outWrite++ = *(reinterpret_cast(runStart++)); + } + } + + ++runEnd; + } + + return static_cast(outWrite - out); +} + +// +// Uncompress an array of bytes compressed with rleCompress(). +// Returns the length of the oncompressed data, or 0 if the +// length of the uncompressed data would be more than maxLength. +// + +static int rleUncompress(int inLength, int maxLength, const signed char in[], + char out[]) { + char *outStart = out; + + while (inLength > 0) { + if (*in < 0) { + int count = -(static_cast(*in++)); + inLength -= count + 1; + + // Fixes #116: Add bounds check to in buffer. + if ((0 > (maxLength -= count)) || (inLength < 0)) return 0; + + memcpy(out, in, count); + out += count; + in += count; + } else { + int count = *in++; + inLength -= 2; + + if (0 > (maxLength -= count + 1)) return 0; + + memset(out, *reinterpret_cast(in), count + 1); + out += count + 1; + + in++; + } + } + + return static_cast(out - outStart); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +// End of RLE code from OpenEXR ----------------------------------- + +static void CompressRle(unsigned char *dst, + tinyexr::tinyexr_uint64 &compressedSize, + const unsigned char *src, unsigned long src_size) { + std::vector tmpBuf(src_size); + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // + // Reorder the pixel data. + // + + const char *srcPtr = reinterpret_cast(src); + + { + char *t1 = reinterpret_cast(&tmpBuf.at(0)); + char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; + const char *stop = srcPtr + src_size; + + for (;;) { + if (srcPtr < stop) + *(t1++) = *(srcPtr++); + else + break; + + if (srcPtr < stop) + *(t2++) = *(srcPtr++); + else + break; + } + } + + // + // Predictor. + // + + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + src_size; + int p = t[-1]; + + while (t < stop) { + int d = int(t[0]) - p + (128 + 256); + p = t[0]; + t[0] = static_cast(d); + ++t; + } + } + + // outSize will be (srcSiz * 3) / 2 at max. + int outSize = rleCompress(static_cast(src_size), + reinterpret_cast(&tmpBuf.at(0)), + reinterpret_cast(dst)); + assert(outSize > 0); + + compressedSize = static_cast(outSize); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if (compressedSize >= src_size) { + compressedSize = src_size; + memcpy(dst, src, src_size); + } +} + +static bool DecompressRle(unsigned char *dst, + const unsigned long uncompressed_size, + const unsigned char *src, unsigned long src_size) { + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + return true; + } + + // Workaround for issue #112. + // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`. + if (src_size <= 2) { + return false; + } + + std::vector tmpBuf(uncompressed_size); + + int ret = rleUncompress(static_cast(src_size), + static_cast(uncompressed_size), + reinterpret_cast(src), + reinterpret_cast(&tmpBuf.at(0))); + if (ret != static_cast(uncompressed_size)) { + return false; + } + + // + // Apply EXR-specific? postprocess. Grabbed from OpenEXR's + // ImfRleCompressor.cpp + // + + // Predictor. + { + unsigned char *t = &tmpBuf.at(0) + 1; + unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; + + while (t < stop) { + int d = int(t[-1]) + int(t[0]) - 128; + t[0] = static_cast(d); + ++t; + } + } + + // Reorder the pixel data. + { + const char *t1 = reinterpret_cast(&tmpBuf.at(0)); + const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + + (uncompressed_size + 1) / 2; + char *s = reinterpret_cast(dst); + char *stop = s + uncompressed_size; + + for (;;) { + if (s < stop) + *(s++) = *(t1++); + else + break; + + if (s < stop) + *(s++) = *(t2++); + else + break; + } + } + + return true; +} + +#if TINYEXR_USE_PIZ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++11-long-long" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wpadded" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++11-extensions" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" + +#if __has_warning("-Wcast-qual") +#pragma clang diagnostic ignored "-Wcast-qual" +#endif + +#if __has_warning("-Wextra-semi-stmt") +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#endif + +#endif + +// +// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp +// +// ----------------------------------------------------------------- +// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas +// Digital Ltd. LLC) +// (3 clause BSD license) +// + +struct PIZChannelData { + unsigned short *start; + unsigned short *end; + int nx; + int ny; + int ys; + int size; +}; + +//----------------------------------------------------------------------------- +// +// 16-bit Haar Wavelet encoding and decoding +// +// The source code in this file is derived from the encoding +// and decoding routines written by Christian Rouet for his +// PIZ image file format. +// +//----------------------------------------------------------------------------- + +// +// Wavelet basis functions without modulo arithmetic; they produce +// the best compression ratios when the wavelet-transformed data are +// Huffman-encoded, but the wavelet transform works only for 14-bit +// data (untransformed data values must be less than (1 << 14)). +// + +inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + short as = static_cast(a); + short bs = static_cast(b); + + short ms = (as + bs) >> 1; + short ds = as - bs; + + l = static_cast(ms); + h = static_cast(ds); +} + +inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + short ls = static_cast(l); + short hs = static_cast(h); + + int hi = hs; + int ai = ls + (hi & 1) + (hi >> 1); + + short as = static_cast(ai); + short bs = static_cast(ai - hi); + + a = static_cast(as); + b = static_cast(bs); +} + +// +// Wavelet basis functions with modulo arithmetic; they work with full +// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't +// compress the data quite as well. +// + +const int NBITS = 16; +const int A_OFFSET = 1 << (NBITS - 1); +const int M_OFFSET = 1 << (NBITS - 1); +const int MOD_MASK = (1 << NBITS) - 1; + +inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, + unsigned short &h) { + int ao = (a + A_OFFSET) & MOD_MASK; + int m = ((ao + b) >> 1); + int d = ao - b; + + if (d < 0) m = (m + M_OFFSET) & MOD_MASK; + + d &= MOD_MASK; + + l = static_cast(m); + h = static_cast(d); +} + +inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, + unsigned short &b) { + int m = l; + int d = h; + int bb = (m - (d >> 1)) & MOD_MASK; + int aa = (d + bb - A_OFFSET) & MOD_MASK; + b = static_cast(bb); + a = static_cast(aa); +} + +// +// 2D Wavelet encoding: +// + +static void wav2Encode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; // == 1 << level + int p2 = 2; // == 1 << (level+1) + + // + // Hierachical loop on smaller dimension n + // + + while (p2 <= n) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet encoding + // + + if (w14) { + wenc14(*px, *p01, i00, i01); + wenc14(*p10, *p11, i10, i11); + wenc14(i00, i10, *px, *p10); + wenc14(i01, i11, *p01, *p11); + } else { + wenc16(*px, *p01, i00, i01); + wenc16(*p10, *p11, i10, i11); + wenc16(i00, i10, *px, *p10); + wenc16(i01, i11, *p01, *p11); + } + } + + // + // Encode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wenc14(*px, *p10, i00, *p10); + else + wenc16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Encode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wenc14(*px, *p01, i00, *p01); + else + wenc16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p = p2; + p2 <<= 1; + } +} + +// +// 2D Wavelet decoding: +// + +static void wav2Decode( + unsigned short *in, // io: values are transformed in place + int nx, // i : x size + int ox, // i : x offset + int ny, // i : y size + int oy, // i : y offset + unsigned short mx) // i : maximum in[x][y] value +{ + bool w14 = (mx < (1 << 14)); + int n = (nx > ny) ? ny : nx; + int p = 1; + int p2; + + // + // Search max level + // + + while (p <= n) p <<= 1; + + p >>= 1; + p2 = p; + p >>= 1; + + // + // Hierarchical loop on smaller dimension n + // + + while (p >= 1) { + unsigned short *py = in; + unsigned short *ey = in + oy * (ny - p2); + int oy1 = oy * p; + int oy2 = oy * p2; + int ox1 = ox * p; + int ox2 = ox * p2; + unsigned short i00, i01, i10, i11; + + // + // Y loop + // + + for (; py <= ey; py += oy2) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + // + // X loop + // + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + unsigned short *p10 = px + oy1; + unsigned short *p11 = p10 + ox1; + + // + // 2D wavelet decoding + // + + if (w14) { + wdec14(*px, *p10, i00, i10); + wdec14(*p01, *p11, i01, i11); + wdec14(i00, i01, *px, *p01); + wdec14(i10, i11, *p10, *p11); + } else { + wdec16(*px, *p10, i00, i10); + wdec16(*p01, *p11, i01, i11); + wdec16(i00, i01, *px, *p01); + wdec16(i10, i11, *p10, *p11); + } + } + + // + // Decode (1D) odd column (still in Y loop) + // + + if (nx & p) { + unsigned short *p10 = px + oy1; + + if (w14) + wdec14(*px, *p10, i00, *p10); + else + wdec16(*px, *p10, i00, *p10); + + *px = i00; + } + } + + // + // Decode (1D) odd line (must loop in X) + // + + if (ny & p) { + unsigned short *px = py; + unsigned short *ex = py + ox * (nx - p2); + + for (; px <= ex; px += ox2) { + unsigned short *p01 = px + ox1; + + if (w14) + wdec14(*px, *p01, i00, *p01); + else + wdec16(*px, *p01, i00, *p01); + + *px = i00; + } + } + + // + // Next level + // + + p2 = p; + p >>= 1; + } +} + +//----------------------------------------------------------------------------- +// +// 16-bit Huffman compression and decompression. +// +// The source code in this file is derived from the 8-bit +// Huffman compression and decompression routines written +// by Christian Rouet for his PIZ image file format. +// +//----------------------------------------------------------------------------- + +// Adds some modification for tinyexr. + +const int HUF_ENCBITS = 16; // literal (value) bit length +const int HUF_DECBITS = 14; // decoding bit size (>= 8) + +const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size +const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size +const int HUF_DECMASK = HUF_DECSIZE - 1; + +struct HufDec { // short code long code + //------------------------------- + int len : 8; // code length 0 + int lit : 24; // lit p size + int *p; // 0 lits +}; + +inline long long hufLength(long long code) { return code & 63; } + +inline long long hufCode(long long code) { return code >> 6; } + +inline void outputBits(int nBits, long long bits, long long &c, int &lc, + char *&out) { + c <<= nBits; + lc += nBits; + + c |= bits; + + while (lc >= 8) *out++ = static_cast((c >> (lc -= 8))); +} + +inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { + while (lc < nBits) { + c = (c << 8) | *(reinterpret_cast(in++)); + lc += 8; + } + + lc -= nBits; + return (c >> lc) & ((1 << nBits) - 1); +} + +// +// ENCODING TABLE BUILDING & (UN)PACKING +// + +// +// Build a "canonical" Huffman code table: +// - for each (uncompressed) symbol, hcode contains the length +// of the corresponding code (in the compressed data) +// - canonical codes are computed and stored in hcode +// - the rules for constructing canonical codes are as follows: +// * shorter codes (if filled with zeroes to the right) +// have a numerically higher value than longer codes +// * for codes with the same length, numerical values +// increase with numerical symbol values +// - because the canonical code table can be constructed from +// symbol lengths alone, the code table can be transmitted +// without sending the actual code values +// - see http://www.compressconsult.com/huffman/ +// + +static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { + long long n[59]; + + // + // For each i from 0 through 58, count the + // number of different codes of length i, and + // store the count in n[i]. + // + + for (int i = 0; i <= 58; ++i) n[i] = 0; + + for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; + + // + // For each i from 58 through 1, compute the + // numerically lowest code with length i, and + // store that code in n[i]. + // + + long long c = 0; + + for (int i = 58; i > 0; --i) { + long long nc = ((c + n[i]) >> 1); + n[i] = c; + c = nc; + } + + // + // hcode[i] contains the length, l, of the + // code for symbol i. Assign the next available + // code of length l to the symbol and store both + // l and the code in hcode[i]. + // + + for (int i = 0; i < HUF_ENCSIZE; ++i) { + int l = static_cast(hcode[i]); + + if (l > 0) hcode[i] = l | (n[l]++ << 6); + } +} + +// +// Compute Huffman codes (based on frq input) and store them in frq: +// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; +// - max code length is 58 bits; +// - codes outside the range [im-iM] have a null length (unused values); +// - original frequencies are destroyed; +// - encoding tables are used by hufEncode() and hufBuildDecTable(); +// + +struct FHeapCompare { + bool operator()(long long *a, long long *b) { return *a > *b; } +}; + +static void hufBuildEncTable( + long long *frq, // io: input frequencies [HUF_ENCSIZE], output table + int *im, // o: min frq index + int *iM) // o: max frq index +{ + // + // This function assumes that when it is called, array frq + // indicates the frequency of all possible symbols in the data + // that are to be Huffman-encoded. (frq[i] contains the number + // of occurrences of symbol i in the data.) + // + // The loop below does three things: + // + // 1) Finds the minimum and maximum indices that point + // to non-zero entries in frq: + // + // frq[im] != 0, and frq[i] == 0 for all i < im + // frq[iM] != 0, and frq[i] == 0 for all i > iM + // + // 2) Fills array fHeap with pointers to all non-zero + // entries in frq. + // + // 3) Initializes array hlink such that hlink[i] == i + // for all array entries. + // + + std::vector hlink(HUF_ENCSIZE); + std::vector fHeap(HUF_ENCSIZE); + + *im = 0; + + while (!frq[*im]) (*im)++; + + int nf = 0; + + for (int i = *im; i < HUF_ENCSIZE; i++) { + hlink[i] = i; + + if (frq[i]) { + fHeap[nf] = &frq[i]; + nf++; + *iM = i; + } + } + + // + // Add a pseudo-symbol, with a frequency count of 1, to frq; + // adjust the fHeap and hlink array accordingly. Function + // hufEncode() uses the pseudo-symbol for run-length encoding. + // + + (*iM)++; + frq[*iM] = 1; + fHeap[nf] = &frq[*iM]; + nf++; + + // + // Build an array, scode, such that scode[i] contains the number + // of bits assigned to symbol i. Conceptually this is done by + // constructing a tree whose leaves are the symbols with non-zero + // frequency: + // + // Make a heap that contains all symbols with a non-zero frequency, + // with the least frequent symbol on top. + // + // Repeat until only one symbol is left on the heap: + // + // Take the two least frequent symbols off the top of the heap. + // Create a new node that has first two nodes as children, and + // whose frequency is the sum of the frequencies of the first + // two nodes. Put the new node back into the heap. + // + // The last node left on the heap is the root of the tree. For each + // leaf node, the distance between the root and the leaf is the length + // of the code for the corresponding symbol. + // + // The loop below doesn't actually build the tree; instead we compute + // the distances of the leaves from the root on the fly. When a new + // node is added to the heap, then that node's descendants are linked + // into a single linear list that starts at the new node, and the code + // lengths of the descendants (that is, their distance from the root + // of the tree) are incremented by one. + // + + std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + std::vector scode(HUF_ENCSIZE); + memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); + + while (nf > 1) { + // + // Find the indices, mm and m, of the two smallest non-zero frq + // values in fHeap, add the smallest frq to the second-smallest + // frq, and remove the smallest frq value from fHeap. + // + + int mm = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + --nf; + + int m = fHeap[0] - frq; + std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + frq[m] += frq[mm]; + std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); + + // + // The entries in scode are linked into lists with the + // entries in hlink serving as "next" pointers and with + // the end of a list marked by hlink[j] == j. + // + // Traverse the lists that start at scode[m] and scode[mm]. + // For each element visited, increment the length of the + // corresponding code by one bit. (If we visit scode[j] + // during the traversal, then the code for symbol j becomes + // one bit longer.) + // + // Merge the lists that start at scode[m] and scode[mm] + // into a single list that starts at scode[m]. + // + + // + // Add a bit to all codes in the first list. + // + + for (int j = m;; j = hlink[j]) { + scode[j]++; + + assert(scode[j] <= 58); + + if (hlink[j] == j) { + // + // Merge the two lists. + // + + hlink[j] = mm; + break; + } + } + + // + // Add a bit to all codes in the second list + // + + for (int j = mm;; j = hlink[j]) { + scode[j]++; + + assert(scode[j] <= 58); + + if (hlink[j] == j) break; + } + } + + // + // Build a canonical Huffman code table, replacing the code + // lengths in scode with (code, code length) pairs. Copy the + // code table from scode into frq. + // + + hufCanonicalCodeTable(scode.data()); + memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); +} + +// +// Pack an encoding table: +// - only code lengths, not actual codes, are stored +// - runs of zeroes are compressed as follows: +// +// unpacked packed +// -------------------------------- +// 1 zero 0 (6 bits) +// 2 zeroes 59 +// 3 zeroes 60 +// 4 zeroes 61 +// 5 zeroes 62 +// n zeroes (6 or more) 63 n-6 (6 + 8 bits) +// + +const int SHORT_ZEROCODE_RUN = 59; +const int LONG_ZEROCODE_RUN = 63; +const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; +const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; + +static void hufPackEncTable( + const long long *hcode, // i : encoding table [HUF_ENCSIZE] + int im, // i : min hcode index + int iM, // i : max hcode index + char **pcode) // o: ptr to packed table (updated) +{ + char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + int l = hufLength(hcode[im]); + + if (l == 0) { + int zerun = 1; + + while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { + if (hufLength(hcode[im + 1]) > 0) break; + im++; + zerun++; + } + + if (zerun >= 2) { + if (zerun >= SHORTEST_LONG_RUN) { + outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); + outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); + } else { + outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); + } + continue; + } + } + + outputBits(6, l, c, lc, p); + } + + if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); + + *pcode = p; +} + +// +// Unpack an encoding table packed by hufPackEncTable(): +// + +static bool hufUnpackEncTable( + const char **pcode, // io: ptr to packed table (updated) + int ni, // i : input size (in bytes) + int im, // i : min hcode index + int iM, // i : max hcode index + long long *hcode) // o: encoding table [HUF_ENCSIZE] +{ + memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); + + const char *p = *pcode; + long long c = 0; + int lc = 0; + + for (; im <= iM; im++) { + if (p - *pcode >= ni) { + return false; + } + + long long l = hcode[im] = getBits(6, c, lc, p); // code length + + if (l == (long long)LONG_ZEROCODE_RUN) { + if (p - *pcode > ni) { + return false; + } + + int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } else if (l >= (long long)SHORT_ZEROCODE_RUN) { + int zerun = l - SHORT_ZEROCODE_RUN + 2; + + if (im + zerun > iM + 1) { + return false; + } + + while (zerun--) hcode[im++] = 0; + + im--; + } + } + + *pcode = const_cast(p); + + hufCanonicalCodeTable(hcode); + + return true; +} + +// +// DECODING TABLE BUILDING +// + +// +// Clear a newly allocated decoding table so that it contains only zeroes. +// + +static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + hdecod[i].len = 0; + hdecod[i].lit = 0; + hdecod[i].p = NULL; + } + // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); +} + +// +// Build a decoding hash table based on the encoding table hcode: +// - short codes (<= HUF_DECBITS) are resolved with a single table access; +// - long code entry allocations are not optimized, because long codes are +// unfrequent; +// - decoding tables are used by hufDecode(); +// + +static bool hufBuildDecTable(const long long *hcode, // i : encoding table + int im, // i : min index in hcode + int iM, // i : max index in hcode + HufDec *hdecod) // o: (allocated by caller) +// decoding table [HUF_DECSIZE] +{ + // + // Init hashtable & loop on all codes. + // Assumes that hufClearDecTable(hdecod) has already been called. + // + + for (; im <= iM; im++) { + long long c = hufCode(hcode[im]); + int l = hufLength(hcode[im]); + + if (c >> l) { + // + // Error: c is supposed to be an l-bit code, + // but c contains a value that is greater + // than the largest l-bit number. + // + + // invalidTableEntry(); + return false; + } + + if (l > HUF_DECBITS) { + // + // Long code: add a secondary entry + // + + HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); + + if (pl->len) { + // + // Error: a short code has already + // been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->lit++; + + if (pl->p) { + int *p = pl->p; + pl->p = new int[pl->lit]; + + for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i]; + + delete[] p; + } else { + pl->p = new int[1]; + } + + pl->p[pl->lit - 1] = im; + } else if (l) { + // + // Short code: init all primary entries + // + + HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); + + for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { + if (pl->len || pl->p) { + // + // Error: a short code or a long code has + // already been stored in table entry *pl. + // + + // invalidTableEntry(); + return false; + } + + pl->len = l; + pl->lit = im; + } + } + } + + return true; +} + +// +// Free the long code entries of a decoding table built by hufBuildDecTable() +// + +static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table +{ + for (int i = 0; i < HUF_DECSIZE; i++) { + if (hdecod[i].p) { + delete[] hdecod[i].p; + hdecod[i].p = 0; + } + } +} + +// +// ENCODING +// + +inline void outputCode(long long code, long long &c, int &lc, char *&out) { + outputBits(hufLength(code), hufCode(code), c, lc, out); +} + +inline void sendCode(long long sCode, int runCount, long long runCode, + long long &c, int &lc, char *&out) { + // + // Output a run of runCount instances of the symbol sCount. + // Output the symbols explicitly, or if that is shorter, output + // the sCode symbol once followed by a runCode symbol and runCount + // expressed as an 8-bit number. + // + + if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { + outputCode(sCode, c, lc, out); + outputCode(runCode, c, lc, out); + outputBits(8, runCount, c, lc, out); + } else { + while (runCount-- >= 0) outputCode(sCode, c, lc, out); + } +} + +// +// Encode (compress) ni values based on the Huffman encoding table hcode: +// + +static int hufEncode // return: output size (in bits) + (const long long *hcode, // i : encoding table + const unsigned short *in, // i : uncompressed input buffer + const int ni, // i : input buffer size (in bytes) + int rlc, // i : rl code + char *out) // o: compressed output buffer +{ + char *outStart = out; + long long c = 0; // bits not yet written to out + int lc = 0; // number of valid bits in c (LSB) + int s = in[0]; + int cs = 0; + + // + // Loop on input values + // + + for (int i = 1; i < ni; i++) { + // + // Count same values or send code + // + + if (s == in[i] && cs < 255) { + cs++; + } else { + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + cs = 0; + } + + s = in[i]; + } + + // + // Send remaining code + // + + sendCode(hcode[s], cs, hcode[rlc], c, lc, out); + + if (lc) *out = (c << (8 - lc)) & 0xff; + + return (out - outStart) * 8 + lc; +} + +// +// DECODING +// + +// +// In order to force the compiler to inline them, +// getChar() and getCode() are implemented as macros +// instead of "inline" functions. +// + +#define getChar(c, lc, in) \ + { \ + c = (c << 8) | *(unsigned char *)(in++); \ + lc += 8; \ + } + +#if 0 +#define getCode(po, rlc, c, lc, in, out, ob, oe) \ + { \ + if (po == rlc) { \ + if (lc < 8) getChar(c, lc, in); \ + \ + lc -= 8; \ + \ + unsigned char cs = (c >> lc); \ + \ + if (out + cs > oe) return false; \ + \ + /* TinyEXR issue 78 */ \ + unsigned short s = out[-1]; \ + \ + while (cs-- > 0) *out++ = s; \ + } else if (out < oe) { \ + *out++ = po; \ + } else { \ + return false; \ + } \ + } +#else +static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, + const char *in_end, unsigned short *&out, + const unsigned short *ob, const unsigned short *oe) { + (void)ob; + if (po == rlc) { + if (lc < 8) { + /* TinyEXR issue 78 */ + if ((in + 1) >= in_end) { + return false; + } + + getChar(c, lc, in); + } + + lc -= 8; + + unsigned char cs = (c >> lc); + + if (out + cs > oe) return false; + + // Bounds check for safety + // Issue 100. + if ((out - 1) < ob) return false; + unsigned short s = out[-1]; + + while (cs-- > 0) *out++ = s; + } else if (out < oe) { + *out++ = po; + } else { + return false; + } + return true; +} +#endif + +// +// Decode (uncompress) ni bits based on encoding & decoding tables: +// + +static bool hufDecode(const long long *hcode, // i : encoding table + const HufDec *hdecod, // i : decoding table + const char *in, // i : compressed input buffer + int ni, // i : input size (in bits) + int rlc, // i : run-length code + int no, // i : expected output size (in bytes) + unsigned short *out) // o: uncompressed output buffer +{ + long long c = 0; + int lc = 0; + unsigned short *outb = out; // begin + unsigned short *oe = out + no; // end + const char *ie = in + (ni + 7) / 8; // input byte size + + // + // Loop on input bytes + // + + while (in < ie) { + getChar(c, lc, in); + + // + // Access decoding table + // + + while (lc >= HUF_DECBITS) { + const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; + + if (pl.len) { + // + // Get short code + // + + lc -= pl.len; + // std::cout << "lit = " << pl.lit << std::endl; + // std::cout << "rlc = " << rlc << std::endl; + // std::cout << "c = " << c << std::endl; + // std::cout << "lc = " << lc << std::endl; + // std::cout << "in = " << in << std::endl; + // std::cout << "out = " << out << std::endl; + // std::cout << "oe = " << oe << std::endl; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + if (!pl.p) { + return false; + } + // invalidCode(); // wrong code + + // + // Search long code + // + + int j; + + for (j = 0; j < pl.lit; j++) { + int l = hufLength(hcode[pl.p[j]]); + + while (lc < l && in < ie) // get more bits + getChar(c, lc, in); + + if (lc >= l) { + if (hufCode(hcode[pl.p[j]]) == + ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { + // + // Found : get long code + // + + lc -= l; + if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + break; + } + } + } + + if (j == pl.lit) { + return false; + // invalidCode(); // Not found + } + } + } + } + + // + // Get remaining (short) codes + // + + int i = (8 - ni) & 7; + c >>= i; + lc -= i; + + while (lc > 0) { + const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; + + if (pl.len) { + lc -= pl.len; + if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { + return false; + } + } else { + return false; + // invalidCode(); // wrong (long) code + } + } + + if (out - outb != no) { + return false; + } + // notEnoughData (); + + return true; +} + +static void countFrequencies(std::vector &freq, + const unsigned short data[/*n*/], int n) { + for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; + + for (int i = 0; i < n; ++i) ++freq[data[i]]; +} + +static void writeUInt(char buf[4], unsigned int i) { + unsigned char *b = (unsigned char *)buf; + + b[0] = i; + b[1] = i >> 8; + b[2] = i >> 16; + b[3] = i >> 24; +} + +static unsigned int readUInt(const char buf[4]) { + const unsigned char *b = (const unsigned char *)buf; + + return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | + ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); +} + +// +// EXTERNAL INTERFACE +// + +static int hufCompress(const unsigned short raw[], int nRaw, + char compressed[]) { + if (nRaw == 0) return 0; + + std::vector freq(HUF_ENCSIZE); + + countFrequencies(freq, raw, nRaw); + + int im = 0; + int iM = 0; + hufBuildEncTable(freq.data(), &im, &iM); + + char *tableStart = compressed + 20; + char *tableEnd = tableStart; + hufPackEncTable(freq.data(), im, iM, &tableEnd); + int tableLength = tableEnd - tableStart; + + char *dataStart = tableEnd; + int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); + int data_length = (nBits + 7) / 8; + + writeUInt(compressed, im); + writeUInt(compressed + 4, iM); + writeUInt(compressed + 8, tableLength); + writeUInt(compressed + 12, nBits); + writeUInt(compressed + 16, 0); // room for future extensions + + return dataStart + data_length - compressed; +} + +static bool hufUncompress(const char compressed[], int nCompressed, + std::vector *raw) { + if (nCompressed == 0) { + if (raw->size() != 0) return false; + + return false; + } + + int im = readUInt(compressed); + int iM = readUInt(compressed + 4); + // int tableLength = readUInt (compressed + 8); + int nBits = readUInt(compressed + 12); + + if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; + + const char *ptr = compressed + 20; + + // + // Fast decoder needs at least 2x64-bits of compressed data, and + // needs to be run-able on this platform. Otherwise, fall back + // to the original decoder + // + + // if (FastHufDecoder::enabled() && nBits > 128) + //{ + // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); + // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); + //} + // else + { + std::vector freq(HUF_ENCSIZE); + std::vector hdec(HUF_DECSIZE); + + hufClearDecTable(&hdec.at(0)); + + hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, + &freq.at(0)); + + { + if (nBits > 8 * (nCompressed - (ptr - compressed))) { + return false; + } + + hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); + hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(), + raw->data()); + } + // catch (...) + //{ + // hufFreeDecTable (hdec); + // throw; + //} + + hufFreeDecTable(&hdec.at(0)); + } + + return true; +} + +// +// Functions to compress the range of values in the pixel data +// + +const int USHORT_RANGE = (1 << 16); +const int BITMAP_SIZE = (USHORT_RANGE >> 3); + +static void bitmapFromData(const unsigned short data[/*nData*/], int nData, + unsigned char bitmap[BITMAP_SIZE], + unsigned short &minNonZero, + unsigned short &maxNonZero) { + for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; + + for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); + + bitmap[0] &= ~1; // zero is not explicitly stored in + // the bitmap; we assume that the + // data always contain zeroes + minNonZero = BITMAP_SIZE - 1; + maxNonZero = 0; + + for (int i = 0; i < BITMAP_SIZE; ++i) { + if (bitmap[i]) { + if (minNonZero > i) minNonZero = i; + if (maxNonZero < i) maxNonZero = i; + } + } +} + +static unsigned short forwardLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) + lut[i] = k++; + else + lut[i] = 0; + } + + return k - 1; // maximum value stored in lut[], +} // i.e. number of ones in bitmap minus 1 + +static unsigned short reverseLutFromBitmap( + const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { + int k = 0; + + for (int i = 0; i < USHORT_RANGE; ++i) { + if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; + } + + int n = k - 1; + + while (k < USHORT_RANGE) lut[k++] = 0; + + return n; // maximum k where lut[k] is non-zero, +} // i.e. number of ones in bitmap minus 1 + +static void applyLut(const unsigned short lut[USHORT_RANGE], + unsigned short data[/*nData*/], int nData) { + for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, + const unsigned char *inPtr, size_t inSize, + const std::vector &channelInfo, + int data_width, int num_lines) { + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !MINIZ_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + assert(0); + return false; +#endif + + // Assume `inSize` is multiple of 2 or 4. + std::vector tmpBuffer(inSize / sizeof(unsigned short)); + + std::vector channelData(channelInfo.size()); + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t c = 0; c < channelData.size(); c++) { + PIZChannelData &cd = channelData[c]; + + cd.start = tmpBufferEnd; + cd.end = cd.start; + + cd.nx = data_width; + cd.ny = num_lines; + // cd.ys = c.channel().ySampling; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + cd.size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += cd.nx * cd.ny * cd.size; + } + + const unsigned char *ptr = inPtr; + for (int y = 0; y < num_lines; ++y) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(cd.end, ptr, n * sizeof(unsigned short)); + ptr += n * sizeof(unsigned short); + cd.end += n; + } + } + + bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), + bitmap.data(), minNonZero, maxNonZero); + + std::vector lut(USHORT_RANGE); + unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); + + // + // Store range compression info in _outBuffer + // + + char *buf = reinterpret_cast(outPtr); + + memcpy(buf, &minNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + memcpy(buf, &maxNonZero, sizeof(unsigned short)); + buf += sizeof(unsigned short); + + if (minNonZero <= maxNonZero) { + memcpy(buf, reinterpret_cast(&bitmap[0] + minNonZero), + maxNonZero - minNonZero + 1); + buf += maxNonZero - minNonZero + 1; + } + + // + // Apply wavelet encoding + // + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Apply Huffman encoding; append the result to _outBuffer + // + + // length header(4byte), then huff data. Initialize length header with zero, + // then later fill it by `length`. + char *lengthPtr = buf; + int zero = 0; + memcpy(buf, &zero, sizeof(int)); + buf += sizeof(int); + + int length = + hufCompress(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), buf); + memcpy(lengthPtr, &length, sizeof(int)); + + (*outSize) = static_cast( + (reinterpret_cast(buf) - outPtr) + + static_cast(length)); + + // Use uncompressed data when compressed data is larger than uncompressed. + // (Issue 40) + if ((*outSize) >= inSize) { + (*outSize) = static_cast(inSize); + memcpy(outPtr, inPtr, inSize); + } + return true; +} + +static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, + size_t tmpBufSize, size_t inLen, int num_channels, + const EXRChannelInfo *channels, int data_width, + int num_lines) { + if (inLen == tmpBufSize) { + // Data is not compressed(Issue 40). + memcpy(outPtr, inPtr, inLen); + return true; + } + + std::vector bitmap(BITMAP_SIZE); + unsigned short minNonZero; + unsigned short maxNonZero; + +#if !MINIZ_LITTLE_ENDIAN + // @todo { PIZ compression on BigEndian architecture. } + assert(0); + return false; +#endif + + memset(bitmap.data(), 0, BITMAP_SIZE); + + const unsigned char *ptr = inPtr; + // minNonZero = *(reinterpret_cast(ptr)); + tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); + // maxNonZero = *(reinterpret_cast(ptr + 2)); + tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); + ptr += 4; + + if (maxNonZero >= BITMAP_SIZE) { + return false; + } + + if (minNonZero <= maxNonZero) { + memcpy(reinterpret_cast(&bitmap[0] + minNonZero), ptr, + maxNonZero - minNonZero + 1); + ptr += maxNonZero - minNonZero + 1; + } + + std::vector lut(USHORT_RANGE); + memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); + unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); + + // + // Huffman decoding + // + + int length; + + // length = *(reinterpret_cast(ptr)); + tinyexr::cpy4(&length, reinterpret_cast(ptr)); + ptr += sizeof(int); + + if (size_t((ptr - inPtr) + length) > inLen) { + return false; + } + + std::vector tmpBuffer(tmpBufSize); + hufUncompress(reinterpret_cast(ptr), length, &tmpBuffer); + + // + // Wavelet decoding + // + + std::vector channelData(static_cast(num_channels)); + + unsigned short *tmpBufferEnd = &tmpBuffer.at(0); + + for (size_t i = 0; i < static_cast(num_channels); ++i) { + const EXRChannelInfo &chan = channels[i]; + + size_t pixelSize = sizeof(int); // UINT and FLOAT + if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { + pixelSize = sizeof(short); + } + + channelData[i].start = tmpBufferEnd; + channelData[i].end = channelData[i].start; + channelData[i].nx = data_width; + channelData[i].ny = num_lines; + // channelData[i].ys = 1; + channelData[i].size = static_cast(pixelSize / sizeof(short)); + + tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; + } + + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + for (int j = 0; j < cd.size; ++j) { + wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, + maxValue); + } + } + + // + // Expand the pixel data to their original range + // + + applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSize)); + + for (int y = 0; y < num_lines; y++) { + for (size_t i = 0; i < channelData.size(); ++i) { + PIZChannelData &cd = channelData[i]; + + // if (modp (y, cd.ys) != 0) + // continue; + + size_t n = static_cast(cd.nx * cd.size); + memcpy(outPtr, cd.end, static_cast(n * sizeof(unsigned short))); + outPtr += n * sizeof(unsigned short); + cd.end += n; + } + } + + return true; +} +#endif // TINYEXR_USE_PIZ + +#if TINYEXR_USE_ZFP +struct ZFPCompressionParam { + double rate; + int precision; + double tolerance; + int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* + + ZFPCompressionParam() { + type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; + rate = 2.0; + precision = 0; + tolerance = 0.0f; + } +}; + +bool FindZFPCompressionParam(ZFPCompressionParam *param, + const EXRAttribute *attributes, + int num_attributes) { + bool foundType = false; + + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) && + (attributes[i].size == 1)) { + param->type = static_cast(attributes[i].value[0]); + + foundType = true; + } + } + + if (!foundType) { + return false; + } + + if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && + (attributes[i].size == 8)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && + (attributes[i].size == 4)) { + param->rate = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + for (int i = 0; i < num_attributes; i++) { + if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && + (attributes[i].size == 8)) { + param->tolerance = *(reinterpret_cast(attributes[i].value)); + return true; + } + } + } else { + assert(0); + } + + return false; +} + +// Assume pixel format is FLOAT for all channels. +static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, + int num_channels, const unsigned char *src, + unsigned long src_size, + const ZFPCompressionParam ¶m) { + size_t uncompressed_size = dst_width * dst_num_lines * num_channels; + + if (uncompressed_size == src_size) { + // Data is not compressed(Issue 40). + memcpy(dst, src, src_size); + } + + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + assert((dst_width % 4) == 0); + assert((dst_num_lines % 4) == 0); + + if ((dst_width & 3U) || (dst_num_lines & 3U)) { + return false; + } + + field = + zfp_field_2d(reinterpret_cast(const_cast(src)), + zfp_type_float, dst_width, dst_num_lines * num_channels); + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2, + /* write random access */ 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision, zfp_type_float); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); + } else { + assert(0); + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + std::vector buf(buf_size); + memcpy(&buf.at(0), src, src_size); + + bitstream *stream = stream_open(&buf.at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_stream_rewind(zfp); + + size_t image_size = dst_width * dst_num_lines; + + for (int c = 0; c < num_channels; c++) { + // decompress 4x4 pixel block. + for (int y = 0; y < dst_num_lines; y += 4) { + for (int x = 0; x < dst_width; x += 4) { + float fblock[16]; + zfp_decode_block_float_2(zfp, fblock); + for (int j = 0; j < 4; j++) { + for (int i = 0; i < 4; i++) { + dst[c * image_size + ((y + j) * dst_width + (x + i))] = + fblock[j * 4 + i]; + } + } + } + } + } + + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(stream); + + return true; +} + +// Assume pixel format is FLOAT for all channels. +bool CompressZfp(std::vector *outBuf, unsigned int *outSize, + const float *inPtr, int width, int num_lines, int num_channels, + const ZFPCompressionParam ¶m) { + zfp_stream *zfp = NULL; + zfp_field *field = NULL; + + assert((width % 4) == 0); + assert((num_lines % 4) == 0); + + if ((width & 3U) || (num_lines & 3U)) { + return false; + } + + // create input array. + field = zfp_field_2d(reinterpret_cast(const_cast(inPtr)), + zfp_type_float, width, num_lines * num_channels); + + zfp = zfp_stream_open(NULL); + + if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { + zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { + zfp_stream_set_precision(zfp, param.precision, zfp_type_float); + } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { + zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); + } else { + assert(0); + } + + size_t buf_size = zfp_stream_maximum_size(zfp, field); + + outBuf->resize(buf_size); + + bitstream *stream = stream_open(&outBuf->at(0), buf_size); + zfp_stream_set_bit_stream(zfp, stream); + zfp_field_free(field); + + size_t image_size = width * num_lines; + + for (int c = 0; c < num_channels; c++) { + // compress 4x4 pixel block. + for (int y = 0; y < num_lines; y += 4) { + for (int x = 0; x < width; x += 4) { + float fblock[16]; + for (int j = 0; j < 4; j++) { + for (int i = 0; i < 4; i++) { + fblock[j * 4 + i] = + inPtr[c * image_size + ((y + j) * width + (x + i))]; + } + } + zfp_encode_block_float_2(zfp, fblock); + } + } + } + + zfp_stream_flush(zfp); + (*outSize) = zfp_stream_compressed_size(zfp); + + zfp_stream_close(zfp); + + return true; +} + +#endif + +// +// ----------------------------------------------------------------- +// + +// TODO(syoyo): Refactor function arguments. +static bool DecodePixelData(/* out */ unsigned char **out_images, + const int *requested_pixel_types, + const unsigned char *data_ptr, size_t data_len, + int compression_type, int line_order, int width, + int height, int x_stride, int y, int line_no, + int num_lines, size_t pixel_data_size, + size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ +#if TINYEXR_USE_PIZ + if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) { + // Invalid input #90 + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast( + static_cast(width * num_lines) * pixel_data_size)); + size_t tmpBufLen = outBuf.size(); + + bool ret = tinyexr::DecompressPiz( + reinterpret_cast(&outBuf.at(0)), data_ptr, tmpBufLen, + data_len, static_cast(num_channels), channels, width, num_lines); + + if (!ret) { + return false; + } + + // For PIZ_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + FP16 hf; + + // hf.u = line_ptr[u]; + // use `cpy` to avoid unaligned memory access when compiler's + // optimization is on. + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + image += offset; + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast(&outBuf.at( + v * pixel_data_size * static_cast(x_stride) + + channel_offset_list[c] * static_cast(x_stride))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += static_cast( + (height - 1 - (line_no + static_cast(v)))) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + } + } +#else + assert(0 && "PIZ is enabled in this build"); + return false; +#endif + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || + compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + assert(dstLen > 0); + if (!tinyexr::DecompressZip( + reinterpret_cast(&outBuf.at(0)), &dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For ZIP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + size_t offset = 0; + if (line_order == 0) { + offset = (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + offset = (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + image += offset; + + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = static_cast(outBuf.size()); + if (dstLen == 0) { + return false; + } + + if (!tinyexr::DecompressRle( + reinterpret_cast(&outBuf.at(0)), dstLen, data_ptr, + static_cast(data_len))) { + return false; + } + + // For RLE_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + // val = line_ptr[u]; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + if (!FindZFPCompressionParam(&zfp_compression_param, attributes, + num_attributes)) { + assert(0); + return false; + } + + // Allocate original data size. + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + unsigned long dstLen = outBuf.size(); + assert(dstLen > 0); + tinyexr::DecompressZfp(reinterpret_cast(&outBuf.at(0)), width, + num_lines, num_channels, data_ptr, + static_cast(data_len), + zfp_compression_param); + + // For ZFP_COMPRESSION: + // pixel sample data for channel 0 for scanline 0 + // pixel sample data for channel 1 for scanline 0 + // pixel sample data for channel ... for scanline 0 + // pixel sample data for channel n for scanline 0 + // pixel sample data for channel 0 for scanline 1 + // pixel sample data for channel 1 for scanline 1 + // pixel sample data for channel ... for scanline 1 + // pixel sample data for channel n for scanline 1 + // ... + for (size_t c = 0; c < static_cast(num_channels); c++) { + assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + assert(0); + return false; + } + } +#else + (void)attributes; + (void)num_attributes; + (void)num_channels; + assert(0); + return false; +#endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + for (size_t c = 0; c < num_channels; c++) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + const unsigned short *line_ptr = + reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + outLine[u] = hf.u; + } + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + tinyexr::FP16 hf; + + // address may not be aliged. use byte-wise copy for safety.#76 + // hf.u = line_ptr[u]; + tinyexr::cpy2(&(hf.u), line_ptr + u); + + tinyexr::swap2(reinterpret_cast(&hf.u)); + + tinyexr::FP32 f32 = half_to_float(hf); + + outLine[u] = f32.f; + } + } else { + assert(0); + return false; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + const float *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + float *outLine = reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + if (reinterpret_cast(line_ptr + width) > + (data_ptr + data_len)) { + // Insufficient data size + return false; + } + + for (int u = 0; u < width; u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + const unsigned int *line_ptr = reinterpret_cast( + data_ptr + v * pixel_data_size * size_t(width) + + channel_offset_list[c] * static_cast(width)); + + unsigned int *outLine = + reinterpret_cast(out_images[c]); + if (line_order == 0) { + outLine += (size_t(y) + v) * size_t(x_stride); + } else { + outLine += + (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); + } + + for (int u = 0; u < width; u++) { + if (reinterpret_cast(line_ptr + u) >= + (data_ptr + data_len)) { + // Corrupsed data? + return false; + } + + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); + + tinyexr::swap4(reinterpret_cast(&val)); + + outLine[u] = val; + } + } + } + } + } + + return true; +} + +static bool DecodeTiledPixelData( + unsigned char **out_images, int *width, int *height, + const int *requested_pixel_types, const unsigned char *data_ptr, + size_t data_len, int compression_type, int line_order, int data_width, + int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, + int tile_size_y, size_t pixel_data_size, size_t num_attributes, + const EXRAttribute *attributes, size_t num_channels, + const EXRChannelInfo *channels, + const std::vector &channel_offset_list) { + assert(tile_offset_x * tile_size_x < data_width); + assert(tile_offset_y * tile_size_y < data_height); + + // Compute actual image size in a tile. + if ((tile_offset_x + 1) * tile_size_x >= data_width) { + (*width) = data_width - (tile_offset_x * tile_size_x); + } else { + (*width) = tile_size_x; + } + + if ((tile_offset_y + 1) * tile_size_y >= data_height) { + (*height) = data_height - (tile_offset_y * tile_size_y); + } else { + (*height) = tile_size_y; + } + + // Image size = tile size. + return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, + compression_type, line_order, (*width), tile_size_y, + /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, + (*height), pixel_data_size, num_attributes, attributes, + num_channels, channels, channel_offset_list); +} + +static bool ComputeChannelLayout(std::vector *channel_offset_list, + int *pixel_data_size, size_t *channel_offset, + int num_channels, + const EXRChannelInfo *channels) { + channel_offset_list->resize(static_cast(num_channels)); + + (*pixel_data_size) = 0; + (*channel_offset) = 0; + + for (size_t c = 0; c < static_cast(num_channels); c++) { + (*channel_offset_list)[c] = (*channel_offset); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + (*pixel_data_size) += sizeof(unsigned short); + (*channel_offset) += sizeof(unsigned short); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + (*pixel_data_size) += sizeof(float); + (*channel_offset) += sizeof(float); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + (*pixel_data_size) += sizeof(unsigned int); + (*channel_offset) += sizeof(unsigned int); + } else { + // ??? + return false; + } + } + return true; +} + +static unsigned char **AllocateImage(int num_channels, + const EXRChannelInfo *channels, + const int *requested_pixel_types, + int data_width, int data_height) { + unsigned char **images = + reinterpret_cast(static_cast( + malloc(sizeof(float *) * static_cast(num_channels)))); + + for (size_t c = 0; c < static_cast(num_channels); c++) { + size_t data_len = + static_cast(data_width) * static_cast(data_height); + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // pixel_data_size += sizeof(unsigned short); + // channel_offset += sizeof(unsigned short); + // Alloc internal image for half type. + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + images[c] = + reinterpret_cast(static_cast( + malloc(sizeof(unsigned short) * data_len))); + } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else { + assert(0); + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + // pixel_data_size += sizeof(float); + // channel_offset += sizeof(float); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(float) * data_len))); + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // pixel_data_size += sizeof(unsigned int); + // channel_offset += sizeof(unsigned int); + images[c] = reinterpret_cast( + static_cast(malloc(sizeof(unsigned int) * data_len))); + } else { + assert(0); + } + } + + return images; +} + +static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, + const EXRVersion *version, std::string *err, + const unsigned char *buf, size_t size) { + const char *marker = reinterpret_cast(&buf[0]); + + if (empty_header) { + (*empty_header) = false; + } + + if (version->multipart) { + if (size > 0 && marker[0] == '\0') { + // End of header list. + if (empty_header) { + (*empty_header) = true; + } + return TINYEXR_SUCCESS; + } + } + + // According to the spec, the header of every OpenEXR file must contain at + // least the following attributes: + // + // channels chlist + // compression compression + // dataWindow box2i + // displayWindow box2i + // lineOrder lineOrder + // pixelAspectRatio float + // screenWindowCenter v2f + // screenWindowWidth float + bool has_channels = false; + bool has_compression = false; + bool has_data_window = false; + bool has_display_window = false; + bool has_line_order = false; + bool has_pixel_aspect_ratio = false; + bool has_screen_window_center = false; + bool has_screen_window_width = false; + + info->data_window[0] = 0; + info->data_window[1] = 0; + info->data_window[2] = 0; + info->data_window[3] = 0; + info->line_order = 0; // @fixme + info->display_window[0] = 0; + info->display_window[1] = 0; + info->display_window[2] = 0; + info->display_window[3] = 0; + info->screen_window_center[0] = 0.0f; + info->screen_window_center[1] = 0.0f; + info->screen_window_width = -1.0f; + info->pixel_aspect_ratio = -1.0f; + + info->tile_size_x = -1; + info->tile_size_y = -1; + info->tile_level_mode = -1; + info->tile_rounding_mode = -1; + + info->attributes.clear(); + + // Read attributes + size_t orig_size = size; + for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { + if (0 == size) { + if (err) { + (*err) += "Insufficient data size for attributes.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + if (err) { + (*err) += "Failed to read attribute.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (version->tiled && attr_name.compare("tiles") == 0) { + unsigned int x_size, y_size; + unsigned char tile_mode; + assert(data.size() == 9); + memcpy(&x_size, &data.at(0), sizeof(int)); + memcpy(&y_size, &data.at(4), sizeof(int)); + tile_mode = data[8]; + tinyexr::swap4(&x_size); + tinyexr::swap4(&y_size); + + info->tile_size_x = static_cast(x_size); + info->tile_size_y = static_cast(y_size); + + // mode = levelMode + roundingMode * 16 + info->tile_level_mode = tile_mode & 0x3; + info->tile_rounding_mode = (tile_mode >> 4) & 0x1; + + } else if (attr_name.compare("compression") == 0) { + bool ok = false; + if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { + ok = true; + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + ok = true; +#else + if (err) { + (*err) = "PIZ compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + ok = true; +#else + if (err) { + (*err) = "ZFP compression is not supported."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; +#endif + } + + if (!ok) { + if (err) { + (*err) = "Unknown compression type."; + } + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + info->compression_type = static_cast(data[0]); + has_compression = true; + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!ReadChannelInfo(info->channels, data)) { + if (err) { + (*err) += "Failed to parse channel info.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (info->channels.size() < 1) { + if (err) { + (*err) += "# of channels is zero.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + has_channels = true; + + } else if (attr_name.compare("dataWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->data_window[0], &data.at(0), sizeof(int)); + memcpy(&info->data_window[1], &data.at(4), sizeof(int)); + memcpy(&info->data_window[2], &data.at(8), sizeof(int)); + memcpy(&info->data_window[3], &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast(&info->data_window[0])); + tinyexr::swap4(reinterpret_cast(&info->data_window[1])); + tinyexr::swap4(reinterpret_cast(&info->data_window[2])); + tinyexr::swap4(reinterpret_cast(&info->data_window[3])); + has_data_window = true; + } + } else if (attr_name.compare("displayWindow") == 0) { + if (data.size() >= 16) { + memcpy(&info->display_window[0], &data.at(0), sizeof(int)); + memcpy(&info->display_window[1], &data.at(4), sizeof(int)); + memcpy(&info->display_window[2], &data.at(8), sizeof(int)); + memcpy(&info->display_window[3], &data.at(12), sizeof(int)); + tinyexr::swap4( + reinterpret_cast(&info->display_window[0])); + tinyexr::swap4( + reinterpret_cast(&info->display_window[1])); + tinyexr::swap4( + reinterpret_cast(&info->display_window[2])); + tinyexr::swap4( + reinterpret_cast(&info->display_window[3])); + + has_display_window = true; + } + } else if (attr_name.compare("lineOrder") == 0) { + if (data.size() >= 1) { + info->line_order = static_cast(data[0]); + has_line_order = true; + } + } else if (attr_name.compare("pixelAspectRatio") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); + tinyexr::swap4( + reinterpret_cast(&info->pixel_aspect_ratio)); + has_pixel_aspect_ratio = true; + } + } else if (attr_name.compare("screenWindowCenter") == 0) { + if (data.size() >= 8) { + memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); + memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); + tinyexr::swap4( + reinterpret_cast(&info->screen_window_center[0])); + tinyexr::swap4( + reinterpret_cast(&info->screen_window_center[1])); + has_screen_window_center = true; + } + } else if (attr_name.compare("screenWindowWidth") == 0) { + if (data.size() >= sizeof(float)) { + memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); + tinyexr::swap4( + reinterpret_cast(&info->screen_window_width)); + + has_screen_window_width = true; + } + } else if (attr_name.compare("chunkCount") == 0) { + if (data.size() >= sizeof(int)) { + memcpy(&info->chunk_count, &data.at(0), sizeof(int)); + tinyexr::swap4(reinterpret_cast(&info->chunk_count)); + } + } else { + // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) + if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + EXRAttribute attrib; +#ifdef _MSC_VER + strncpy_s(attrib.name, attr_name.c_str(), 255); + strncpy_s(attrib.type, attr_type.c_str(), 255); +#else + strncpy(attrib.name, attr_name.c_str(), 255); + strncpy(attrib.type, attr_type.c_str(), 255); +#endif + attrib.name[255] = '\0'; + attrib.type[255] = '\0'; + attrib.size = static_cast(data.size()); + attrib.value = static_cast(malloc(data.size())); + memcpy(reinterpret_cast(attrib.value), &data.at(0), + data.size()); + info->attributes.push_back(attrib); + } + } + } + + // Check if required attributes exist + { + std::stringstream ss_err; + + if (!has_compression) { + ss_err << "\"compression\" attribute not found in the header." + << std::endl; + } + + if (!has_channels) { + ss_err << "\"channels\" attribute not found in the header." << std::endl; + } + + if (!has_line_order) { + ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; + } + + if (!has_display_window) { + ss_err << "\"displayWindow\" attribute not found in the header." + << std::endl; + } + + if (!has_data_window) { + ss_err << "\"dataWindow\" attribute not found in the header or invalid." + << std::endl; + } + + if (!has_pixel_aspect_ratio) { + ss_err << "\"pixelAspectRatio\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_width) { + ss_err << "\"screenWindowWidth\" attribute not found in the header." + << std::endl; + } + + if (!has_screen_window_center) { + ss_err << "\"screenWindowCenter\" attribute not found in the header." + << std::endl; + } + + if (!(ss_err.str().empty())) { + if (err) { + (*err) += ss_err.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + } + + info->header_len = static_cast(orig_size - size); + + return TINYEXR_SUCCESS; +} + +// C++ HeaderInfo to C EXRHeader conversion. +static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { + exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; + exr_header->screen_window_center[0] = info.screen_window_center[0]; + exr_header->screen_window_center[1] = info.screen_window_center[1]; + exr_header->screen_window_width = info.screen_window_width; + exr_header->chunk_count = info.chunk_count; + exr_header->display_window[0] = info.display_window[0]; + exr_header->display_window[1] = info.display_window[1]; + exr_header->display_window[2] = info.display_window[2]; + exr_header->display_window[3] = info.display_window[3]; + exr_header->data_window[0] = info.data_window[0]; + exr_header->data_window[1] = info.data_window[1]; + exr_header->data_window[2] = info.data_window[2]; + exr_header->data_window[3] = info.data_window[3]; + exr_header->line_order = info.line_order; + exr_header->compression_type = info.compression_type; + + exr_header->tile_size_x = info.tile_size_x; + exr_header->tile_size_y = info.tile_size_y; + exr_header->tile_level_mode = info.tile_level_mode; + exr_header->tile_rounding_mode = info.tile_rounding_mode; + + exr_header->num_channels = static_cast(info.channels.size()); + + exr_header->channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { +#ifdef _MSC_VER + strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#else + strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); +#endif + // manually add '\0' for safety. + exr_header->channels[c].name[255] = '\0'; + + exr_header->channels[c].pixel_type = info.channels[c].pixel_type; + exr_header->channels[c].p_linear = info.channels[c].p_linear; + exr_header->channels[c].x_sampling = info.channels[c].x_sampling; + exr_header->channels[c].y_sampling = info.channels[c].y_sampling; + } + + exr_header->pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->pixel_types[c] = info.channels[c].pixel_type; + } + + // Initially fill with values of `pixel_types` + exr_header->requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(exr_header->num_channels))); + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; + } + + exr_header->num_custom_attributes = static_cast(info.attributes.size()); + + if (exr_header->num_custom_attributes > 0) { + // TODO(syoyo): Report warning when # of attributes exceeds + // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` + if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; + } + + exr_header->custom_attributes = static_cast(malloc( + sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); + + for (size_t i = 0; i < info.attributes.size(); i++) { + memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, + 256); + memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, + 256); + exr_header->custom_attributes[i].size = info.attributes[i].size; + // Just copy poiner + exr_header->custom_attributes[i].value = info.attributes[i].value; + } + + } else { + exr_header->custom_attributes = NULL; + } + + exr_header->header_len = info.header_len; +} + +static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, + const std::vector &offsets, + const unsigned char *head, const size_t size, + std::string *err) { + int num_channels = exr_header->num_channels; + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1; + int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1; + + if ((data_width < 0) || (data_height < 0)) { + if (err) { + std::stringstream ss; + ss << "Invalid data width or data height: " << data_width << ", " + << data_height << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + const int threshold = 1024 * 8192; // heuristics + if ((data_width > threshold) || (data_height > threshold)) { + if (err) { + std::stringstream ss; + ss << "data_with or data_height too large. data_width: " << data_width + << ", " + << "data_height = " << data_height << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + } + + size_t num_blocks = offsets.size(); + + std::vector channel_offset_list; + int pixel_data_size = 0; + size_t channel_offset = 0; + if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, + &channel_offset, num_channels, + exr_header->channels)) { + if (err) { + (*err) += "Failed to compute channel layout.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + bool invalid_data = false; // TODO(LTE): Use atomic lock for MT safety. + + if (exr_header->tiled) { + // value check + if (exr_header->tile_size_x < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + + if (exr_header->tile_size_y < 0) { + if (err) { + std::stringstream ss; + ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n"; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_HEADER; + } + + size_t num_tiles = offsets.size(); // = # of blocks + + exr_image->tiles = static_cast( + calloc(sizeof(EXRTile), static_cast(num_tiles))); + + int err_code = TINYEXR_SUCCESS; + +#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) + + std::vector workers; + std::atomic tile_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_tiles)) { + num_threads = int(num_tiles); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + size_t tile_idx = 0; + while ((tile_idx = tile_count++) < num_tiles) { + +#else + for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { +#endif + // Allocate memory for each tile. + exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( + num_channels, exr_header->channels, + exr_header->requested_pixel_types, exr_header->tile_size_x, + exr_header->tile_size_y); + + // 16 byte: tile coordinates + // 4 byte : data size + // ~ : data(uncompressed or compressed) + if (offsets[tile_idx] + sizeof(int) * 5 > size) { + // TODO(LTE): atomic + if (err) { + (*err) += "Insufficient data size.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + break; + } + + size_t data_size = + size_t(size - (offsets[tile_idx] + sizeof(int) * 5)); + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[tile_idx]); + + int tile_coordinates[4]; + memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); + tinyexr::swap4( + reinterpret_cast(&tile_coordinates[0])); + tinyexr::swap4( + reinterpret_cast(&tile_coordinates[1])); + tinyexr::swap4( + reinterpret_cast(&tile_coordinates[2])); + tinyexr::swap4( + reinterpret_cast(&tile_coordinates[3])); + + // @todo{ LoD } + if (tile_coordinates[2] != 0) { + err_code = TINYEXR_ERROR_UNSUPPORTED_FEATURE; + break; + } + if (tile_coordinates[3] != 0) { + err_code = TINYEXR_ERROR_UNSUPPORTED_FEATURE; + break; + } + + int data_len; + memcpy(&data_len, data_ptr + 16, + sizeof(int)); // 16 = sizeof(tile_coordinates) + tinyexr::swap4(reinterpret_cast(&data_len)); + + if (data_len < 4 || size_t(data_len) > data_size) { + // TODO(LTE): atomic + if (err) { + (*err) += "Insufficient data length.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + break; + } + + // Move to data addr: 20 = 16 + 4; + data_ptr += 20; + + bool ret = tinyexr::DecodeTiledPixelData( + exr_image->tiles[tile_idx].images, + &(exr_image->tiles[tile_idx].width), + &(exr_image->tiles[tile_idx].height), + exr_header->requested_pixel_types, data_ptr, + static_cast(data_len), exr_header->compression_type, + exr_header->line_order, data_width, data_height, + tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, + exr_header->tile_size_y, static_cast(pixel_data_size), + static_cast(exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list); + + if (!ret) { + // TODO(LTE): atomic + if (err) { + (*err) += "Failed to decode tile data.\n"; + } + err_code = TINYEXR_ERROR_INVALID_DATA; + } + + exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; + exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; + exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; + exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; + +#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) + } + })); + } // num_thread loop + + for (auto &t : workers) { + t.join(); + } + +#else + } +#endif + + if (err_code != TINYEXR_SUCCESS) { + return err_code; + } + + exr_image->num_tiles = static_cast(num_tiles); + } else { // scanline format + + // Don't allow too large image(256GB * pixel_data_size or more). Workaround + // for #104. + size_t total_data_len = + size_t(data_width) * size_t(data_height) * size_t(num_channels); + const bool total_data_len_overflown = + sizeof(void *) == 8 ? (total_data_len >= 0x4000000000) : false; + if ((total_data_len == 0) || total_data_len_overflown) { + if (err) { + std::stringstream ss; + ss << "Image data size is zero or too large: width = " << data_width + << ", height = " << data_height << ", channels = " << num_channels + << std::endl; + (*err) += ss.str(); + } + return TINYEXR_ERROR_INVALID_DATA; + } + + exr_image->images = tinyexr::AllocateImage( + num_channels, exr_header->channels, exr_header->requested_pixel_types, + data_width, data_height); + +#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) + std::vector workers; + std::atomic y_count(0); + + int num_threads = std::max(1, int(std::thread::hardware_concurrency())); + if (num_threads > int(num_blocks)) { + num_threads = int(num_blocks); + } + + for (int t = 0; t < num_threads; t++) { + workers.emplace_back(std::thread([&]() { + int y = 0; + while ((y = y_count++) < int(num_blocks)) { + +#else + +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int y = 0; y < static_cast(num_blocks); y++) { + +#endif + size_t y_idx = static_cast(y); + + if (offsets[y_idx] + sizeof(int) * 2 > size) { + invalid_data = true; + } else { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed or compressed) + size_t data_size = + size_t(size - (offsets[y_idx] + sizeof(int) * 2)); + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y_idx]); + + int line_no; + memcpy(&line_no, data_ptr, sizeof(int)); + int data_len; + memcpy(&data_len, data_ptr + 4, sizeof(int)); + tinyexr::swap4(reinterpret_cast(&line_no)); + tinyexr::swap4(reinterpret_cast(&data_len)); + + if (size_t(data_len) > data_size) { + invalid_data = true; + + } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) { + // Too large value. Assume this is invalid + // 2**20 = 1048576 = heuristic value. + invalid_data = true; + } else if (data_len == 0) { + // TODO(syoyo): May be ok to raise the threshold for example + // `data_len < 4` + invalid_data = true; + } else { + // line_no may be negative. + int end_line_no = (std::min)(line_no + num_scanline_blocks, + (exr_header->data_window[3] + 1)); + + int num_lines = end_line_no - line_no; + + if (num_lines <= 0) { + invalid_data = true; + } else { + // Move to data addr: 8 = 4 + 4; + data_ptr += 8; + + // Adjust line_no with data_window.bmin.y + + // overflow check + tinyexr_int64 lno = + static_cast(line_no) - + static_cast(exr_header->data_window[1]); + if (lno > std::numeric_limits::max()) { + line_no = -1; // invalid + } else if (lno < -std::numeric_limits::max()) { + line_no = -1; // invalid + } else { + line_no -= exr_header->data_window[1]; + } + + if (line_no < 0) { + invalid_data = true; + } else { + if (!tinyexr::DecodePixelData( + exr_image->images, exr_header->requested_pixel_types, + data_ptr, static_cast(data_len), + exr_header->compression_type, exr_header->line_order, + data_width, data_height, data_width, y, line_no, + num_lines, static_cast(pixel_data_size), + static_cast( + exr_header->num_custom_attributes), + exr_header->custom_attributes, + static_cast(exr_header->num_channels), + exr_header->channels, channel_offset_list)) { + invalid_data = true; + } + } + } + } + } + +#if (__cplusplus > 199711L) && (TINYEXR_USE_THREAD > 0) + } + })); + } + + for (auto &t : workers) { + t.join(); + } +#else + } // omp parallel +#endif + } + + if (invalid_data) { + if (err) { + std::stringstream ss; + (*err) += "Invalid data found when decoding pixels.\n"; + } + return TINYEXR_ERROR_INVALID_DATA; + } + + // Overwrite `pixel_type` with `requested_pixel_type`. + { + for (int c = 0; c < exr_header->num_channels; c++) { + exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; + } + } + + { + exr_image->num_channels = num_channels; + + exr_image->width = data_width; + exr_image->height = data_height; + } + + return TINYEXR_SUCCESS; +} + +static bool ReconstructLineOffsets( + std::vector *offsets, size_t n, + const unsigned char *head, const unsigned char *marker, const size_t size) { + assert(head < marker); + assert(offsets->size() == n); + + for (size_t i = 0; i < n; i++) { + size_t offset = static_cast(marker - head); + // Offset should not exceed whole EXR file/data size. + if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) { + return false; + } + + int y; + unsigned int data_len; + + memcpy(&y, marker, sizeof(int)); + memcpy(&data_len, marker + 4, sizeof(unsigned int)); + + if (data_len >= size) { + return false; + } + + tinyexr::swap4(reinterpret_cast(&y)); + tinyexr::swap4(reinterpret_cast(&data_len)); + + (*offsets)[i] = offset; + + marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) + } + + return true; +} + +static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *head, + const unsigned char *marker, const size_t size, + const char **err) { + if (exr_image == NULL || exr_header == NULL || head == NULL || + marker == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + int num_scanline_blocks = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanline_blocks = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanline_blocks = 16; + } + + int data_width = exr_header->data_window[2] - exr_header->data_window[0]; + if (data_width >= std::numeric_limits::max()) { + // Issue 63 + tinyexr::SetErrorMessage("Invalid data width value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + data_width++; + + int data_height = exr_header->data_window[3] - exr_header->data_window[1]; + if (data_height >= std::numeric_limits::max()) { + tinyexr::SetErrorMessage("Invalid data height value", err); + return TINYEXR_ERROR_INVALID_DATA; + } + data_height++; + + if ((data_width < 0) || (data_height < 0)) { + tinyexr::SetErrorMessage("data width or data height is negative.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + // Do not allow too large data_width and data_height. header invalid? + { + const int threshold = 1024 * 8192; // heuristics + if (data_width > threshold) { + tinyexr::SetErrorMessage("data width too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + if (data_height > threshold) { + tinyexr::SetErrorMessage("data height too large.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + // Read offset tables. + size_t num_blocks = 0; + + if (exr_header->chunk_count > 0) { + // Use `chunkCount` attribute. + num_blocks = static_cast(exr_header->chunk_count); + } else if (exr_header->tiled) { + // @todo { LoD } + size_t num_x_tiles = static_cast(data_width) / + static_cast(exr_header->tile_size_x); + if (num_x_tiles * static_cast(exr_header->tile_size_x) < + static_cast(data_width)) { + num_x_tiles++; + } + size_t num_y_tiles = static_cast(data_height) / + static_cast(exr_header->tile_size_y); + if (num_y_tiles * static_cast(exr_header->tile_size_y) < + static_cast(data_height)) { + num_y_tiles++; + } + + num_blocks = num_x_tiles * num_y_tiles; + } else { + num_blocks = static_cast(data_height) / + static_cast(num_scanline_blocks); + if (num_blocks * static_cast(num_scanline_blocks) < + static_cast(data_height)) { + num_blocks++; + } + } + + std::vector offsets(num_blocks); + + for (size_t y = 0; y < num_blocks; y++) { + tinyexr::tinyexr_uint64 offset; + // Issue #81 + if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { + tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); + tinyexr::swap8(&offset); + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += sizeof(tinyexr::tinyexr_uint64); // = 8 + offsets[y] = offset; + } + + // If line offsets are invalid, we try to reconstruct it. + // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. + for (size_t y = 0; y < num_blocks; y++) { + if (offsets[y] <= 0) { + // TODO(syoyo) Report as warning? + // if (err) { + // stringstream ss; + // ss << "Incomplete lineOffsets." << std::endl; + // (*err) += ss.str(); + //} + bool ret = + ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); + if (ret) { + // OK + break; + } else { + tinyexr::SetErrorMessage( + "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + } + + { + std::string e; + int ret = DecodeChunk(exr_image, exr_header, offsets, head, size, &e); + + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + +#if 1 + FreeEXRImage(exr_image); +#else + // release memory(if exists) + if ((exr_header->num_channels > 0) && exr_image && exr_image->images) { + for (size_t c = 0; c < size_t(exr_header->num_channels); c++) { + if (exr_image->images[c]) { + free(exr_image->images[c]); + exr_image->images[c] = NULL; + } + } + free(exr_image->images); + exr_image->images = NULL; + } +#endif + } + + return ret; + } +} + +static void GetLayers(const EXRHeader& exr_header, std::vector& layer_names) { + // Naive implementation + // Group channels by layers + // go over all channel names, split by periods + // collect unique names + layer_names.clear(); + for (int c = 0; c < exr_header.num_channels; c++) { + std::string full_name(exr_header.channels[c].name); + const size_t pos = full_name.find_last_of('.'); + if (pos != std::string::npos && pos != 0 && pos + 1 < full_name.size()) { + full_name.erase(pos); + if (std::find(layer_names.begin(), layer_names.end(), full_name) == layer_names.end()) + layer_names.push_back(full_name); + } + } +} + +struct LayerChannel { + explicit LayerChannel (size_t i, std::string n) + : index(i) + , name(n) + {} + size_t index; + std::string name; +}; + +static void ChannelsInLayer(const EXRHeader& exr_header, const std::string layer_name, std::vector& channels) { + channels.clear(); + for (int c = 0; c < exr_header.num_channels; c++) { + std::string ch_name(exr_header.channels[c].name); + if (layer_name.empty()) { + const size_t pos = ch_name.find_last_of('.'); + if (pos != std::string::npos && pos < ch_name.size()) { + ch_name = ch_name.substr(pos + 1); + } + } else { + const size_t pos = ch_name.find(layer_name + '.'); + if (pos == std::string::npos) + continue; + if (pos == 0) { + ch_name = ch_name.substr(layer_name.size() + 1); + } + } + LayerChannel ch(size_t(c), ch_name); + channels.push_back(ch); + } +} + +} // namespace tinyexr + +int EXRLayers(const char *filename, const char **layer_names[], int *num_layers, const char **err) { + EXRVersion exr_version; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + tinyexr::SetErrorMessage("Invalid EXR header.", err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + + std::vector layer_vec; + tinyexr::GetLayers(exr_header, layer_vec); + + (*num_layers) = int(layer_vec.size()); + (*layer_names) = static_cast( + malloc(sizeof(const char *) * static_cast(layer_vec.size()))); + for (size_t c = 0; c < static_cast(layer_vec.size()); c++) { +#ifdef _MSC_VER + (*layer_names)[c] = _strdup(layer_vec[c].c_str()); +#else + (*layer_names)[c] = strdup(layer_vec[c].c_str()); +#endif + } + + FreeEXRHeader(&exr_header); + return TINYEXR_SUCCESS; +} + +int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, + const char **err) { + return LoadEXRWithLayer(out_rgba, width, height, filename, /* layername */NULL, err); +} + +int LoadEXRWithLayer(float **out_rgba, int *width, int *height, const char *filename, const char *layername, + const char **err) { + if (out_rgba == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + InitEXRHeader(&exr_header); + InitEXRImage(&exr_image); + + { + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to open EXR file or read version info from EXR file. code(" << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + if (exr_version.multipart || exr_version.non_image) { + tinyexr::SetErrorMessage( + "Loading multipart or DeepImage is not supported in LoadEXR() API", + err); + return TINYEXR_ERROR_INVALID_DATA; // @fixme. + } + } + + { + int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + // TODO: Probably limit loading to layers (channels) selected by layer index + { + int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); + if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + return ret; + } + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + + std::vector layer_names; + tinyexr::GetLayers(exr_header, layer_names); + + std::vector channels; + tinyexr::ChannelsInLayer(exr_header, layername == NULL ? "" : std::string(layername), channels); + + if (channels.size() < 1) { + tinyexr::SetErrorMessage("Layer Not Found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_LAYER_NOT_FOUND; + } + + size_t ch_count = channels.size() < 4 ? channels.size() : 4; + for (size_t c = 0; c < ch_count; c++) { + const tinyexr::LayerChannel &ch = channels[c]; + + if (ch.name == "R") { + idxR = int(ch.index); + } + else if (ch.name == "G") { + idxG = int(ch.index); + } + else if (ch.name == "B") { + idxB = int(ch.index); + } + else if (ch.name == "A") { + idxA = int(ch.index); + } + } + + if (channels.size() == 1) { + int chIdx = int(channels.front().index); + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + for (int it = 0; it < exr_image.num_tiles; it++) { + for (int j = 0; j < exr_header.tile_size_y; j++) { + for (int i = 0; i < exr_header.tile_size_x; i++) { + const int ii = + exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; + const int jj = + exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; + const int idx = ii + jj * exr_image.width; + + // out of region check. + if (ii >= exr_image.width) { + continue; + } + if (jj >= exr_image.height) { + continue; + } + const int srcIdx = i + j * exr_header.tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[chIdx][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[chIdx][srcIdx]; + } + } + } + } else { + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + const float val = reinterpret_cast(exr_image.images)[chIdx][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + } else { + // Assume RGB(A) + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + if (exr_header.tiled) { + for (int it = 0; it < exr_image.num_tiles; it++) { + for (int j = 0; j < exr_header.tile_size_y; j++) { + for (int i = 0; i < exr_header.tile_size_x; i++) { + const int ii = + exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; + const int jj = + exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; + const int idx = ii + jj * exr_image.width; + + // out of region check. + if (ii >= exr_image.width) { + continue; + } + if (jj >= exr_image.height) { + continue; + } + const int srcIdx = i + j * exr_header.tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } + } else { + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +int IsEXR(const char *filename) { + EXRVersion exr_version; + + int ret = ParseEXRVersionFromFile(&exr_version, filename); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_header == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument. `memory` or `exr_header` argument is null in " + "ParseEXRHeaderFromMemory()", + err); + + // Invalid argument + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Insufficient header/data size.\n", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + if (err && !err_str.empty()) { + tinyexr::SetErrorMessage(err_str, err); + } + } + + ConvertHeader(exr_header, info); + + // transfoer `tiled` from version. + exr_header->tiled = version->tiled; + + return ret; +} + +int LoadEXRFromMemory(float **out_rgba, int *width, int *height, + const unsigned char *memory, size_t size, + const char **err) { + if (out_rgba == NULL || memory == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRVersion exr_version; + EXRImage exr_image; + EXRHeader exr_header; + + InitEXRHeader(&exr_header); + + int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); + if (ret != TINYEXR_SUCCESS) { + std::stringstream ss; + ss << "Failed to parse EXR version. code(" << ret << ")"; + tinyexr::SetErrorMessage(ss.str(), err); + return ret; + } + + ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // Read HALF channel as FLOAT. + for (int i = 0; i < exr_header.num_channels; i++) { + if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { + exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; + } + } + + InitEXRImage(&exr_image); + ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + // RGBA + int idxR = -1; + int idxG = -1; + int idxB = -1; + int idxA = -1; + for (int c = 0; c < exr_header.num_channels; c++) { + if (strcmp(exr_header.channels[c].name, "R") == 0) { + idxR = c; + } else if (strcmp(exr_header.channels[c].name, "G") == 0) { + idxG = c; + } else if (strcmp(exr_header.channels[c].name, "B") == 0) { + idxB = c; + } else if (strcmp(exr_header.channels[c].name, "A") == 0) { + idxA = c; + } + } + + // TODO(syoyo): Refactor removing same code as used in LoadEXR(). + if (exr_header.num_channels == 1) { + // Grayscale channel only. + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + for (int it = 0; it < exr_image.num_tiles; it++) { + for (int j = 0; j < exr_header.tile_size_y; j++) { + for (int i = 0; i < exr_header.tile_size_x; i++) { + const int ii = + exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; + const int jj = + exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; + const int idx = ii + jj * exr_image.width; + + // out of region check. + if (ii >= exr_image.width) { + continue; + } + if (jj >= exr_image.height) { + continue; + } + const int srcIdx = i + j * exr_header.tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[0][srcIdx]; + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[0][srcIdx]; + } + } + } + } else { + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + const float val = reinterpret_cast(exr_image.images)[0][i]; + (*out_rgba)[4 * i + 0] = val; + (*out_rgba)[4 * i + 1] = val; + (*out_rgba)[4 * i + 2] = val; + (*out_rgba)[4 * i + 3] = val; + } + } + + } else { + // TODO(syoyo): Support non RGBA image. + + if (idxR == -1) { + tinyexr::SetErrorMessage("R channel not found", err); + + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxG == -1) { + tinyexr::SetErrorMessage("G channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + if (idxB == -1) { + tinyexr::SetErrorMessage("B channel not found", err); + // @todo { free exr_image } + return TINYEXR_ERROR_INVALID_DATA; + } + + (*out_rgba) = reinterpret_cast( + malloc(4 * sizeof(float) * static_cast(exr_image.width) * + static_cast(exr_image.height))); + + if (exr_header.tiled) { + for (int it = 0; it < exr_image.num_tiles; it++) { + for (int j = 0; j < exr_header.tile_size_y; j++) + for (int i = 0; i < exr_header.tile_size_x; i++) { + const int ii = + exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; + const int jj = + exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; + const int idx = ii + jj * exr_image.width; + + // out of region check. + if (ii >= exr_image.width) { + continue; + } + if (jj >= exr_image.height) { + continue; + } + const int srcIdx = i + j * exr_header.tile_size_x; + unsigned char **src = exr_image.tiles[it].images; + (*out_rgba)[4 * idx + 0] = + reinterpret_cast(src)[idxR][srcIdx]; + (*out_rgba)[4 * idx + 1] = + reinterpret_cast(src)[idxG][srcIdx]; + (*out_rgba)[4 * idx + 2] = + reinterpret_cast(src)[idxB][srcIdx]; + if (idxA != -1) { + (*out_rgba)[4 * idx + 3] = + reinterpret_cast(src)[idxA][srcIdx]; + } else { + (*out_rgba)[4 * idx + 3] = 1.0; + } + } + } + } else { + for (int i = 0; i < exr_image.width * exr_image.height; i++) { + (*out_rgba)[4 * i + 0] = + reinterpret_cast(exr_image.images)[idxR][i]; + (*out_rgba)[4 * i + 1] = + reinterpret_cast(exr_image.images)[idxG][i]; + (*out_rgba)[4 * i + 2] = + reinterpret_cast(exr_image.images)[idxB][i]; + if (idxA != -1) { + (*out_rgba)[4 * i + 3] = + reinterpret_cast(exr_image.images)[idxA][i]; + } else { + (*out_rgba)[4 * i + 3] = 1.0; + } + } + } + } + + (*width) = exr_image.width; + (*height) = exr_image.height; + + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); + + return TINYEXR_SUCCESS; +} + +int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + if (filesize < 16) { + tinyexr::SetErrorMessage("File size too short " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + std::vector buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + (void)ret; + } + + return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize, + err); +} + +int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, + const unsigned char *memory, const size_t size, + const char **err) { + if (exr_image == NULL || memory == NULL || + (size < tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + const unsigned char *head = memory; + const unsigned char *marker = reinterpret_cast( + memory + exr_header->header_len + + 8); // +8 for magic number + version header. + return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, + err); +} + +size_t SaveEXRImageToMemory(const EXRImage *exr_image, + const EXRHeader *exr_header, + unsigned char **memory_out, const char **err) { + if (exr_image == NULL || memory_out == NULL || + exr_header->compression_type < 0) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToMemory", err); + return 0; + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + tinyexr::SetErrorMessage("PIZ compression is not supported in this build", + err); + return 0; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + tinyexr::SetErrorMessage("ZFP compression is not supported in this build", + err); + return 0; + } +#endif + +#if TINYEXR_USE_ZFP + for (size_t i = 0; i < static_cast(exr_header->num_channels); i++) { + if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) { + tinyexr::SetErrorMessage("Pixel type must be FLOAT for ZFP compression", + err); + return 0; + } + } +#endif + + std::vector memory; + + // Header + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + memory.insert(memory.end(), header, header + 4); + } + + // Version, scanline. + { + char marker[] = {2, 0, 0, 0}; + /* @todo + if (exr_header->tiled) { + marker[1] |= 0x2; + } + if (exr_header->long_name) { + marker[1] |= 0x4; + } + if (exr_header->non_image) { + marker[1] |= 0x8; + } + if (exr_header->multipart) { + marker[1] |= 0x10; + } + */ + memory.insert(memory.end(), marker, marker + 4); + } + + int num_scanlines = 1; + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanlines = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + num_scanlines = 32; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + num_scanlines = 16; + } + + // Write attributes. + std::vector channels; + { + std::vector data; + + for (int c = 0; c < exr_header->num_channels; c++) { + tinyexr::ChannelInfo info; + info.p_linear = 0; + info.pixel_type = exr_header->requested_pixel_types[c]; + info.x_sampling = 1; + info.y_sampling = 1; + info.name = std::string(exr_header->channels[c].name); + channels.push_back(info); + } + + tinyexr::WriteChannelInfo(data, channels); + + tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), + static_cast(data.size())); + } + + { + int comp = exr_header->compression_type; + tinyexr::swap4(reinterpret_cast(&comp)); + tinyexr::WriteAttributeToMemory( + &memory, "compression", "compression", + reinterpret_cast(&comp), 1); + } + + { + int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1}; + tinyexr::swap4(reinterpret_cast(&data[0])); + tinyexr::swap4(reinterpret_cast(&data[1])); + tinyexr::swap4(reinterpret_cast(&data[2])); + tinyexr::swap4(reinterpret_cast(&data[3])); + tinyexr::WriteAttributeToMemory( + &memory, "dataWindow", "box2i", + reinterpret_cast(data), sizeof(int) * 4); + tinyexr::WriteAttributeToMemory( + &memory, "displayWindow", "box2i", + reinterpret_cast(data), sizeof(int) * 4); + } + + { + unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } + tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", + &line_order, 1); + } + + { + float aspectRatio = 1.0f; + tinyexr::swap4(reinterpret_cast(&aspectRatio)); + tinyexr::WriteAttributeToMemory( + &memory, "pixelAspectRatio", "float", + reinterpret_cast(&aspectRatio), sizeof(float)); + } + + { + float center[2] = {0.0f, 0.0f}; + tinyexr::swap4(reinterpret_cast(¢er[0])); + tinyexr::swap4(reinterpret_cast(¢er[1])); + tinyexr::WriteAttributeToMemory( + &memory, "screenWindowCenter", "v2f", + reinterpret_cast(center), 2 * sizeof(float)); + } + + { + float w = static_cast(exr_image->width); + tinyexr::swap4(reinterpret_cast(&w)); + tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float", + reinterpret_cast(&w), + sizeof(float)); + } + + // Custom attributes + if (exr_header->num_custom_attributes > 0) { + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + tinyexr::WriteAttributeToMemory( + &memory, exr_header->custom_attributes[i].name, + exr_header->custom_attributes[i].type, + reinterpret_cast( + exr_header->custom_attributes[i].value), + exr_header->custom_attributes[i].size); + } + } + + { // end of header + unsigned char e = 0; + memory.push_back(e); + } + + int num_blocks = exr_image->height / num_scanlines; + if (num_blocks * num_scanlines < exr_image->height) { + num_blocks++; + } + + std::vector offsets(static_cast(num_blocks)); + + size_t headerSize = memory.size(); + tinyexr::tinyexr_uint64 offset = + headerSize + + static_cast(num_blocks) * + sizeof( + tinyexr::tinyexr_int64); // sizeof(header) + sizeof(offsetTable) + + std::vector > data_list( + static_cast(num_blocks)); + std::vector channel_offset_list( + static_cast(exr_header->num_channels)); + + int pixel_data_size = 0; + size_t channel_offset = 0; + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + channel_offset_list[c] = channel_offset; + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + pixel_data_size += sizeof(unsigned short); + channel_offset += sizeof(unsigned short); + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_FLOAT) { + pixel_data_size += sizeof(float); + channel_offset += sizeof(float); + } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { + pixel_data_size += sizeof(unsigned int); + channel_offset += sizeof(unsigned int); + } else { + assert(0); + } + } + +#if TINYEXR_USE_ZFP + tinyexr::ZFPCompressionParam zfp_compression_param; + + // Use ZFP compression parameter from custom attributes(if such a parameter + // exists) + { + bool ret = tinyexr::FindZFPCompressionParam( + &zfp_compression_param, exr_header->custom_attributes, + exr_header->num_custom_attributes); + + if (!ret) { + // Use predefined compression parameter. + zfp_compression_param.type = 0; + zfp_compression_param.rate = 2; + } + } +#endif + + // TOOD(LTE): C++11 thread + +// Use signed int since some OpenMP compiler doesn't allow unsigned type for +// `parallel for` +#if TINYEXR_USE_OPENMP +#pragma omp parallel for +#endif + for (int i = 0; i < num_blocks; i++) { + size_t ii = static_cast(i); + int start_y = num_scanlines * i; + int endY = (std::min)(num_scanlines * (i + 1), exr_image->height); + int h = endY - start_y; + + std::vector buf( + static_cast(exr_image->width * h * pixel_data_size)); + + for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { + if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < h; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * + static_cast(exr_image->width))); + for (int x = 0; x < exr_image->width; x++) { + tinyexr::FP16 h16; + h16.u = reinterpret_cast( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::FP32 f32 = half_to_float(h16); + + tinyexr::swap4(reinterpret_cast(&f32.f)); + + // line_ptr[x] = f32.f; + tinyexr::cpy4(line_ptr + x, &(f32.f)); + } + } + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < h; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + exr_image->width) + + channel_offset_list[c] * + static_cast(exr_image->width))); + for (int x = 0; x < exr_image->width; x++) { + unsigned short val = reinterpret_cast( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap2(&val); + + // line_ptr[x] = val; + tinyexr::cpy2(line_ptr + x, &val); + } + } + } else { + assert(0); + } + + } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { + if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < h; y++) { + // Assume increasing Y + unsigned short *line_ptr = reinterpret_cast( + &buf.at(static_cast(pixel_data_size * y * + exr_image->width) + + channel_offset_list[c] * + static_cast(exr_image->width))); + for (int x = 0; x < exr_image->width; x++) { + tinyexr::FP32 f32; + f32.f = reinterpret_cast( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::FP16 h16; + h16 = float_to_half_full(f32); + + tinyexr::swap2(reinterpret_cast(&h16.u)); + + // line_ptr[x] = h16.u; + tinyexr::cpy2(line_ptr + x, &(h16.u)); + } + } + } else if (exr_header->requested_pixel_types[c] == + TINYEXR_PIXELTYPE_FLOAT) { + for (int y = 0; y < h; y++) { + // Assume increasing Y + float *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * + static_cast(exr_image->width))); + for (int x = 0; x < exr_image->width; x++) { + float val = reinterpret_cast( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap4(reinterpret_cast(&val)); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } else { + assert(0); + } + } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { + for (int y = 0; y < h; y++) { + // Assume increasing Y + unsigned int *line_ptr = reinterpret_cast(&buf.at( + static_cast(pixel_data_size * y * exr_image->width) + + channel_offset_list[c] * static_cast(exr_image->width))); + for (int x = 0; x < exr_image->width; x++) { + unsigned int val = reinterpret_cast( + exr_image->images)[c][(y + start_y) * exr_image->width + x]; + + tinyexr::swap4(&val); + + // line_ptr[x] = val; + tinyexr::cpy4(line_ptr + x, &val); + } + } + } + } + + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(uncompressed) + std::vector header(8); + unsigned int data_len = static_cast(buf.size()); + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast(&header.at(0))); + tinyexr::swap4(reinterpret_cast(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), buf.begin(), + buf.begin() + data_len); + + } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#if TINYEXR_USE_MINIZ + std::vector block(tinyexr::miniz::mz_compressBound( + static_cast(buf.size()))); +#else + std::vector block( + compressBound(static_cast(buf.size()))); +#endif + tinyexr::tinyexr_uint64 outSize = block.size(); + + tinyexr::CompressZip(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size())); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector header(8); + unsigned int data_len = static_cast(outSize); // truncate + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast(&header.at(0))); + tinyexr::swap4(reinterpret_cast(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { + // (buf.size() * 3) / 2 would be enough. + std::vector block((buf.size() * 3) / 2); + + tinyexr::tinyexr_uint64 outSize = block.size(); + + tinyexr::CompressRle(&block.at(0), outSize, + reinterpret_cast(&buf.at(0)), + static_cast(buf.size())); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector header(8); + unsigned int data_len = static_cast(outSize); // truncate + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast(&header.at(0))); + tinyexr::swap4(reinterpret_cast(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { +#if TINYEXR_USE_PIZ + unsigned int bufLen = + 8192 + static_cast( + 2 * static_cast( + buf.size())); // @fixme { compute good bound. } + std::vector block(bufLen); + unsigned int outSize = static_cast(block.size()); + + CompressPiz(&block.at(0), &outSize, + reinterpret_cast(&buf.at(0)), + buf.size(), channels, exr_image->width, h); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector header(8); + unsigned int data_len = outSize; + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast(&header.at(0))); + tinyexr::swap4(reinterpret_cast(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + +#else + assert(0); +#endif + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { +#if TINYEXR_USE_ZFP + std::vector block; + unsigned int outSize; + + tinyexr::CompressZfp( + &block, &outSize, reinterpret_cast(&buf.at(0)), + exr_image->width, h, exr_header->num_channels, zfp_compression_param); + + // 4 byte: scan line + // 4 byte: data size + // ~ : pixel data(compressed) + std::vector header(8); + unsigned int data_len = outSize; + memcpy(&header.at(0), &start_y, sizeof(int)); + memcpy(&header.at(4), &data_len, sizeof(unsigned int)); + + tinyexr::swap4(reinterpret_cast(&header.at(0))); + tinyexr::swap4(reinterpret_cast(&header.at(4))); + + data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); + data_list[ii].insert(data_list[ii].end(), block.begin(), + block.begin() + data_len); + +#else + assert(0); +#endif + } else { + assert(0); + } + } // omp parallel + + for (size_t i = 0; i < static_cast(num_blocks); i++) { + offsets[i] = offset; + tinyexr::swap8(reinterpret_cast(&offsets[i])); + offset += data_list[i].size(); + } + + size_t totalSize = static_cast(offset); + { + memory.insert( + memory.end(), reinterpret_cast(&offsets.at(0)), + reinterpret_cast(&offsets.at(0)) + + sizeof(tinyexr::tinyexr_uint64) * static_cast(num_blocks)); + } + + if (memory.size() == 0) { + tinyexr::SetErrorMessage("Output memory size is zero", err); + return 0; + } + + (*memory_out) = static_cast(malloc(totalSize)); + memcpy((*memory_out), &memory.at(0), memory.size()); + unsigned char *memory_ptr = *memory_out + memory.size(); + + for (size_t i = 0; i < static_cast(num_blocks); i++) { + memcpy(memory_ptr, &data_list[i].at(0), data_list[i].size()); + memory_ptr += data_list[i].size(); + } + + return totalSize; // OK +} + +int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, + const char *filename, const char **err) { + if (exr_image == NULL || filename == NULL || + exr_header->compression_type < 0) { + tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#if !TINYEXR_USE_PIZ + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { + tinyexr::SetErrorMessage("PIZ compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + +#if !TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { + tinyexr::SetErrorMessage("ZFP compression is not supported in this build", + err); + return TINYEXR_ERROR_UNSUPPORTED_FEATURE; + } +#endif + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "wb"); +#else + FILE *fp = fopen(filename, "wb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + unsigned char *mem = NULL; + size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); + if (mem_size == 0) { + return TINYEXR_ERROR_SERIALZATION_FAILED; + } + + size_t written_size = 0; + if ((mem_size > 0) && mem) { + written_size = fwrite(mem, 1, mem_size, fp); + } + free(mem); + + fclose(fp); + + if (written_size != mem_size) { + tinyexr::SetErrorMessage("Cannot write a file", err); + return TINYEXR_ERROR_CANT_WRITE_FILE; + } + + return TINYEXR_SUCCESS; +} + +int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { + if (deep_image == NULL) { + tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _MSC_VER + FILE *fp = NULL; + errno_t errcode = fopen_s(&fp, filename, "rb"); + if ((0 != errcode) || (!fp)) { + tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } +#else + FILE *fp = fopen(filename, "rb"); + if (!fp) { + tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), + err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } +#endif + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + if (filesize == 0) { + fclose(fp); + tinyexr::SetErrorMessage("File size is zero : " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + + std::vector buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + (void)ret; + } + fclose(fp); + + const char *head = &buf[0]; + const char *marker = &buf[0]; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + tinyexr::SetErrorMessage("Invalid magic number", err); + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + // Version, scanline. + { + // ver 2.0, scanline, deep bit on(0x800) + // must be [2, 0, 0, 0] + if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { + tinyexr::SetErrorMessage("Unsupported version or scanline", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + marker += 4; + } + + int dx = -1; + int dy = -1; + int dw = -1; + int dh = -1; + int num_scanline_blocks = 1; // 16 for ZIP compression. + int compression_type = -1; + int num_channels = -1; + std::vector channels; + + // Read attributes + size_t size = filesize - tinyexr::kEXRVersionSize; + for (;;) { + if (0 == size) { + return TINYEXR_ERROR_INVALID_DATA; + } else if (marker[0] == '\0') { + marker++; + size--; + break; + } + + std::string attr_name; + std::string attr_type; + std::vector data; + size_t marker_size; + if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, + marker, size)) { + std::stringstream ss; + ss << "Failed to parse attribute\n"; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_DATA; + } + marker += marker_size; + size -= marker_size; + + if (attr_name.compare("compression") == 0) { + compression_type = data[0]; + if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { + std::stringstream ss; + ss << "Unsupported compression type : " << compression_type; + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { + num_scanline_blocks = 16; + } + + } else if (attr_name.compare("channels") == 0) { + // name: zero-terminated string, from 1 to 255 bytes long + // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 + // pLinear: unsigned char, possible values are 0 and 1 + // reserved: three chars, should be zero + // xSampling: int + // ySampling: int + + if (!tinyexr::ReadChannelInfo(channels, data)) { + tinyexr::SetErrorMessage("Failed to parse channel info", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + num_channels = static_cast(channels.size()); + + if (num_channels < 1) { + tinyexr::SetErrorMessage("Invalid channels format", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + } else if (attr_name.compare("dataWindow") == 0) { + memcpy(&dx, &data.at(0), sizeof(int)); + memcpy(&dy, &data.at(4), sizeof(int)); + memcpy(&dw, &data.at(8), sizeof(int)); + memcpy(&dh, &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast(&dx)); + tinyexr::swap4(reinterpret_cast(&dy)); + tinyexr::swap4(reinterpret_cast(&dw)); + tinyexr::swap4(reinterpret_cast(&dh)); + + } else if (attr_name.compare("displayWindow") == 0) { + int x; + int y; + int w; + int h; + memcpy(&x, &data.at(0), sizeof(int)); + memcpy(&y, &data.at(4), sizeof(int)); + memcpy(&w, &data.at(8), sizeof(int)); + memcpy(&h, &data.at(12), sizeof(int)); + tinyexr::swap4(reinterpret_cast(&x)); + tinyexr::swap4(reinterpret_cast(&y)); + tinyexr::swap4(reinterpret_cast(&w)); + tinyexr::swap4(reinterpret_cast(&h)); + } + } + + assert(dx >= 0); + assert(dy >= 0); + assert(dw >= 0); + assert(dh >= 0); + assert(num_channels >= 1); + + int data_width = dw - dx + 1; + int data_height = dh - dy + 1; + + std::vector image( + static_cast(data_width * data_height * 4)); // 4 = RGBA + + // Read offset tables. + int num_blocks = data_height / num_scanline_blocks; + if (num_blocks * num_scanline_blocks < data_height) { + num_blocks++; + } + + std::vector offsets(static_cast(num_blocks)); + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + tinyexr::tinyexr_int64 offset; + memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); + tinyexr::swap8(reinterpret_cast(&offset)); + marker += sizeof(tinyexr::tinyexr_int64); // = 8 + offsets[y] = offset; + } + +#if TINYEXR_USE_PIZ + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || + (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { +#else + if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || + (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { +#endif + // OK + } else { + tinyexr::SetErrorMessage("Unsupported compression format", err); + return TINYEXR_ERROR_UNSUPPORTED_FORMAT; + } + + deep_image->image = static_cast( + malloc(sizeof(float **) * static_cast(num_channels))); + for (int c = 0; c < num_channels; c++) { + deep_image->image[c] = static_cast( + malloc(sizeof(float *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + } + } + + deep_image->offset_table = static_cast( + malloc(sizeof(int *) * static_cast(data_height))); + for (int y = 0; y < data_height; y++) { + deep_image->offset_table[y] = static_cast( + malloc(sizeof(int) * static_cast(data_width))); + } + + for (size_t y = 0; y < static_cast(num_blocks); y++) { + const unsigned char *data_ptr = + reinterpret_cast(head + offsets[y]); + + // int: y coordinate + // int64: packed size of pixel offset table + // int64: packed size of sample data + // int64: unpacked size of sample data + // compressed pixel offset table + // compressed sample data + int line_no; + tinyexr::tinyexr_int64 packedOffsetTableSize; + tinyexr::tinyexr_int64 packedSampleDataSize; + tinyexr::tinyexr_int64 unpackedSampleDataSize; + memcpy(&line_no, data_ptr, sizeof(int)); + memcpy(&packedOffsetTableSize, data_ptr + 4, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&packedSampleDataSize, data_ptr + 12, + sizeof(tinyexr::tinyexr_int64)); + memcpy(&unpackedSampleDataSize, data_ptr + 20, + sizeof(tinyexr::tinyexr_int64)); + + tinyexr::swap4(reinterpret_cast(&line_no)); + tinyexr::swap8( + reinterpret_cast(&packedOffsetTableSize)); + tinyexr::swap8( + reinterpret_cast(&packedSampleDataSize)); + tinyexr::swap8( + reinterpret_cast(&unpackedSampleDataSize)); + + std::vector pixelOffsetTable(static_cast(data_width)); + + // decode pixel offset table. + { + unsigned long dstLen = + static_cast(pixelOffsetTable.size() * sizeof(int)); + if (!tinyexr::DecompressZip( + reinterpret_cast(&pixelOffsetTable.at(0)), + &dstLen, data_ptr + 28, + static_cast(packedOffsetTableSize))) { + return false; + } + + assert(dstLen == pixelOffsetTable.size() * sizeof(int)); + for (size_t i = 0; i < static_cast(data_width); i++) { + deep_image->offset_table[y][i] = pixelOffsetTable[i]; + } + } + + std::vector sample_data( + static_cast(unpackedSampleDataSize)); + + // decode sample data. + { + unsigned long dstLen = static_cast(unpackedSampleDataSize); + if (dstLen) { + if (!tinyexr::DecompressZip( + reinterpret_cast(&sample_data.at(0)), &dstLen, + data_ptr + 28 + packedOffsetTableSize, + static_cast(packedSampleDataSize))) { + return false; + } + assert(dstLen == static_cast(unpackedSampleDataSize)); + } + } + + // decode sample + int sampleSize = -1; + std::vector channel_offset_list(static_cast(num_channels)); + { + int channel_offset = 0; + for (size_t i = 0; i < static_cast(num_channels); i++) { + channel_offset_list[i] = channel_offset; + if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT + channel_offset += 4; + } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half + channel_offset += 2; + } else if (channels[i].pixel_type == + TINYEXR_PIXELTYPE_FLOAT) { // float + channel_offset += 4; + } else { + assert(0); + } + } + sampleSize = channel_offset; + } + assert(sampleSize >= 2); + + assert(static_cast( + pixelOffsetTable[static_cast(data_width - 1)] * + sampleSize) == sample_data.size()); + int samples_per_line = static_cast(sample_data.size()) / sampleSize; + + // + // Alloc memory + // + + // + // pixel data is stored as image[channels][pixel_samples] + // + { + tinyexr::tinyexr_uint64 data_offset = 0; + for (size_t c = 0; c < static_cast(num_channels); c++) { + deep_image->image[c][y] = static_cast( + malloc(sizeof(float) * static_cast(samples_per_line))); + + if (channels[c].pixel_type == 0) { // UINT + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + unsigned int ui; + unsigned int *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(int))); + tinyexr::cpy4(&ui, src_ptr); + deep_image->image[c][y][x] = static_cast(ui); // @fixme + } + data_offset += + sizeof(unsigned int) * static_cast(samples_per_line); + } else if (channels[c].pixel_type == 1) { // half + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + tinyexr::FP16 f16; + const unsigned short *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(short))); + tinyexr::cpy2(&(f16.u), src_ptr); + tinyexr::FP32 f32 = half_to_float(f16); + deep_image->image[c][y][x] = f32.f; + } + data_offset += sizeof(short) * static_cast(samples_per_line); + } else { // float + for (size_t x = 0; x < static_cast(samples_per_line); x++) { + float f; + const float *src_ptr = reinterpret_cast( + &sample_data.at(size_t(data_offset) + x * sizeof(float))); + tinyexr::cpy4(&f, src_ptr); + deep_image->image[c][y][x] = f; + } + data_offset += sizeof(float) * static_cast(samples_per_line); + } + } + } + } // y + + deep_image->width = data_width; + deep_image->height = data_height; + + deep_image->channel_names = static_cast( + malloc(sizeof(const char *) * static_cast(num_channels))); + for (size_t c = 0; c < static_cast(num_channels); c++) { +#ifdef _WIN32 + deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); +#else + deep_image->channel_names[c] = strdup(channels[c].name.c_str()); +#endif + } + deep_image->num_channels = num_channels; + + return TINYEXR_SUCCESS; +} + +void InitEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return; + } + + exr_image->width = 0; + exr_image->height = 0; + exr_image->num_channels = 0; + + exr_image->images = NULL; + exr_image->tiles = NULL; + + exr_image->num_tiles = 0; +} + +void FreeEXRErrorMessage(const char *msg) { + if (msg) { + free(reinterpret_cast(const_cast(msg))); + } + return; +} + +void InitEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return; + } + + memset(exr_header, 0, sizeof(EXRHeader)); +} + +int FreeEXRHeader(EXRHeader *exr_header) { + if (exr_header == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (exr_header->channels) { + free(exr_header->channels); + } + + if (exr_header->pixel_types) { + free(exr_header->pixel_types); + } + + if (exr_header->requested_pixel_types) { + free(exr_header->requested_pixel_types); + } + + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + if (exr_header->custom_attributes[i].value) { + free(exr_header->custom_attributes[i].value); + } + } + + if (exr_header->custom_attributes) { + free(exr_header->custom_attributes); + } + + return TINYEXR_SUCCESS; +} + +int FreeEXRImage(EXRImage *exr_image) { + if (exr_image == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->images && exr_image->images[i]) { + free(exr_image->images[i]); + } + } + + if (exr_image->images) { + free(exr_image->images); + } + + if (exr_image->tiles) { + for (int tid = 0; tid < exr_image->num_tiles; tid++) { + for (int i = 0; i < exr_image->num_channels; i++) { + if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { + free(exr_image->tiles[tid].images[i]); + } + } + if (exr_image->tiles[tid].images) { + free(exr_image->tiles[tid].images); + } + } + free(exr_image->tiles); + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_header == NULL || exr_version == NULL || filename == NULL) { + tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile", + err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + + if (ret != filesize) { + tinyexr::SetErrorMessage("fread() error on " + std::string(filename), + err); + return TINYEXR_ERROR_INVALID_FILE; + } + } + + return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize, + err); +} + +int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, + int *num_headers, + const EXRVersion *exr_version, + const unsigned char *memory, size_t size, + const char **err) { + if (memory == NULL || exr_headers == NULL || num_headers == NULL || + exr_version == NULL) { + // Invalid argument + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromMemory", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + tinyexr::SetErrorMessage("Data size too short", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory + tinyexr::kEXRVersionSize; + size_t marker_size = size - tinyexr::kEXRVersionSize; + + std::vector infos; + + for (;;) { + tinyexr::HeaderInfo info; + info.clear(); + + std::string err_str; + bool empty_header = false; + int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, + marker, marker_size); + + if (ret != TINYEXR_SUCCESS) { + tinyexr::SetErrorMessage(err_str, err); + return ret; + } + + if (empty_header) { + marker += 1; // skip '\0' + break; + } + + // `chunkCount` must exist in the header. + if (info.chunk_count == 0) { + tinyexr::SetErrorMessage( + "`chunkCount' attribute is not found in the header.", err); + return TINYEXR_ERROR_INVALID_DATA; + } + + infos.push_back(info); + + // move to next header. + marker += info.header_len; + size -= info.header_len; + } + + // allocate memory for EXRHeader and create array of EXRHeader pointers. + (*exr_headers) = + static_cast(malloc(sizeof(EXRHeader *) * infos.size())); + for (size_t i = 0; i < infos.size(); i++) { + EXRHeader *exr_header = static_cast(malloc(sizeof(EXRHeader))); + + ConvertHeader(exr_header, infos[i]); + + // transfoer `tiled` from version. + exr_header->tiled = exr_version->tiled; + + (*exr_headers)[i] = exr_header; + } + + (*num_headers) = static_cast(infos.size()); + + return TINYEXR_SUCCESS; +} + +int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, + const EXRVersion *exr_version, + const char *filename, const char **err) { + if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || + filename == NULL) { + tinyexr::SetErrorMessage( + "Invalid argument for ParseEXRMultipartHeaderFromFile()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + + if (ret != filesize) { + tinyexr::SetErrorMessage("`fread' error. file may be corrupted.", err); + return TINYEXR_ERROR_INVALID_FILE; + } + } + + return ParseEXRMultipartHeaderFromMemory( + exr_headers, num_headers, exr_version, &buf.at(0), filesize, err); +} + +int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, + size_t size) { + if (version == NULL || memory == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const unsigned char *marker = memory; + + // Header check. + { + const char header[] = {0x76, 0x2f, 0x31, 0x01}; + + if (memcmp(marker, header, 4) != 0) { + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + marker += 4; + } + + version->tiled = false; + version->long_name = false; + version->non_image = false; + version->multipart = false; + + // Parse version header. + { + // must be 2 + if (marker[0] != 2) { + return TINYEXR_ERROR_INVALID_EXR_VERSION; + } + + if (version == NULL) { + return TINYEXR_SUCCESS; // May OK + } + + version->version = 2; + + if (marker[1] & 0x2) { // 9th bit + version->tiled = true; + } + if (marker[1] & 0x4) { // 10th bit + version->long_name = true; + } + if (marker[1] & 0x8) { // 11th bit + version->non_image = true; // (deep image) + } + if (marker[1] & 0x10) { // 12th bit + version->multipart = true; + } + } + + return TINYEXR_SUCCESS; +} + +int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { + if (filename == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t file_size; + // Compute size + fseek(fp, 0, SEEK_END); + file_size = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + if (file_size < tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + unsigned char buf[tinyexr::kEXRVersionSize]; + size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); + fclose(fp); + + if (ret != tinyexr::kEXRVersionSize) { + return TINYEXR_ERROR_INVALID_FILE; + } + + return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); +} + +int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, + const unsigned char *memory, + const size_t size, const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || + memory == NULL || (size <= tinyexr::kEXRVersionSize)) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromMemory()", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + // compute total header size. + size_t total_header_size = 0; + for (unsigned int i = 0; i < num_parts; i++) { + if (exr_headers[i]->header_len == 0) { + tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + total_header_size += exr_headers[i]->header_len; + } + + const char *marker = reinterpret_cast( + memory + total_header_size + 4 + + 4); // +8 for magic number and version header. + + marker += 1; // Skip empty header. + + // NOTE 1: + // In multipart image, There is 'part number' before chunk data. + // 4 byte : part number + // 4+ : chunk + // + // NOTE 2: + // EXR spec says 'part number' is 'unsigned long' but actually this is + // 'unsigned int(4 bytes)' in OpenEXR implementation... + // http://www.openexr.com/openexrfilelayout.pdf + + // Load chunk offset table. + std::vector > chunk_offset_table_list; + for (size_t i = 0; i < static_cast(num_parts); i++) { + std::vector offset_table( + static_cast(exr_headers[i]->chunk_count)); + + for (size_t c = 0; c < offset_table.size(); c++) { + tinyexr::tinyexr_uint64 offset; + memcpy(&offset, marker, 8); + tinyexr::swap8(&offset); + + if (offset >= size) { + tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + + offset_table[c] = offset + 4; // +4 to skip 'part number' + marker += 8; + } + + chunk_offset_table_list.push_back(offset_table); + } + + // Decode image. + for (size_t i = 0; i < static_cast(num_parts); i++) { + std::vector &offset_table = + chunk_offset_table_list[i]; + + // First check 'part number' is identitical to 'i' + for (size_t c = 0; c < offset_table.size(); c++) { + const unsigned char *part_number_addr = + memory + offset_table[c] - 4; // -4 to move to 'part number' field. + unsigned int part_no; + memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 + tinyexr::swap4(&part_no); + + if (part_no != i) { + tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", + err); + return TINYEXR_ERROR_INVALID_DATA; + } + } + + std::string e; + int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table, + memory, size, &e); + if (ret != TINYEXR_SUCCESS) { + if (!e.empty()) { + tinyexr::SetErrorMessage(e, err); + } + return ret; + } + } + + return TINYEXR_SUCCESS; +} + +int LoadEXRMultipartImageFromFile(EXRImage *exr_images, + const EXRHeader **exr_headers, + unsigned int num_parts, const char *filename, + const char **err) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { + tinyexr::SetErrorMessage( + "Invalid argument for LoadEXRMultipartImageFromFile", err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + +#ifdef _WIN32 + FILE *fp = NULL; + fopen_s(&fp, filename, "rb"); +#else + FILE *fp = fopen(filename, "rb"); +#endif + if (!fp) { + tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); + return TINYEXR_ERROR_CANT_OPEN_FILE; + } + + size_t filesize; + // Compute size + fseek(fp, 0, SEEK_END); + filesize = static_cast(ftell(fp)); + fseek(fp, 0, SEEK_SET); + + std::vector buf(filesize); // @todo { use mmap } + { + size_t ret; + ret = fread(&buf[0], 1, filesize, fp); + assert(ret == filesize); + fclose(fp); + (void)ret; + } + + return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, + &buf.at(0), filesize, err); +} + +int SaveEXR(const float *data, int width, int height, int components, + const int save_as_fp16, const char *outfilename, const char **err) { + if ((components == 1) || components == 3 || components == 4) { + // OK + } else { + std::stringstream ss; + ss << "Unsupported component value : " << components << std::endl; + + tinyexr::SetErrorMessage(ss.str(), err); + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + EXRHeader header; + InitEXRHeader(&header); + + if ((width < 16) && (height < 16)) { + // No compression for small image. + header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; + } else { + header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; + } + + EXRImage image; + InitEXRImage(&image); + + image.num_channels = components; + + std::vector images[4]; + + if (components == 1) { + images[0].resize(static_cast(width * height)); + memcpy(images[0].data(), data, sizeof(float) * size_t(width * height)); + } else { + images[0].resize(static_cast(width * height)); + images[1].resize(static_cast(width * height)); + images[2].resize(static_cast(width * height)); + images[3].resize(static_cast(width * height)); + + // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers + for (size_t i = 0; i < static_cast(width * height); i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + if (components == 4) { + images[3][i] = data[static_cast(components) * i + 3]; + } + } + } + + float *image_ptr[4] = {0, 0, 0, 0}; + if (components == 4) { + image_ptr[0] = &(images[3].at(0)); // A + image_ptr[1] = &(images[2].at(0)); // B + image_ptr[2] = &(images[1].at(0)); // G + image_ptr[3] = &(images[0].at(0)); // R + } else if (components == 3) { + image_ptr[0] = &(images[2].at(0)); // B + image_ptr[1] = &(images[1].at(0)); // G + image_ptr[2] = &(images[0].at(0)); // R + } else if (components == 1) { + image_ptr[0] = &(images[0].at(0)); // A + } + + image.images = reinterpret_cast(image_ptr); + image.width = width; + image.height = height; + + header.num_channels = components; + header.channels = static_cast(malloc( + sizeof(EXRChannelInfo) * static_cast(header.num_channels))); + // Must be (A)BGR order, since most of EXR viewers expect this channel order. + if (components == 4) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); + strncpy_s(header.channels[1].name, "B", 255); + strncpy_s(header.channels[2].name, "G", 255); + strncpy_s(header.channels[3].name, "R", 255); +#else + strncpy(header.channels[0].name, "A", 255); + strncpy(header.channels[1].name, "B", 255); + strncpy(header.channels[2].name, "G", 255); + strncpy(header.channels[3].name, "R", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + header.channels[1].name[strlen("B")] = '\0'; + header.channels[2].name[strlen("G")] = '\0'; + header.channels[3].name[strlen("R")] = '\0'; + } else if (components == 3) { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "B", 255); + strncpy_s(header.channels[1].name, "G", 255); + strncpy_s(header.channels[2].name, "R", 255); +#else + strncpy(header.channels[0].name, "B", 255); + strncpy(header.channels[1].name, "G", 255); + strncpy(header.channels[2].name, "R", 255); +#endif + header.channels[0].name[strlen("B")] = '\0'; + header.channels[1].name[strlen("G")] = '\0'; + header.channels[2].name[strlen("R")] = '\0'; + } else { +#ifdef _MSC_VER + strncpy_s(header.channels[0].name, "A", 255); +#else + strncpy(header.channels[0].name, "A", 255); +#endif + header.channels[0].name[strlen("A")] = '\0'; + } + + header.pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + header.requested_pixel_types = static_cast( + malloc(sizeof(int) * static_cast(header.num_channels))); + for (int i = 0; i < header.num_channels; i++) { + header.pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image + + if (save_as_fp16 > 0) { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format + } else { + header.requested_pixel_types[i] = + TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. + // no precision reduction) + } + } + + int ret = SaveEXRImageToFile(&image, &header, outfilename, err); + if (ret != TINYEXR_SUCCESS) { + return ret; + } + + free(header.channels); + free(header.pixel_types); + free(header.requested_pixel_types); + + return ret; +} + +#ifdef __clang__ +// zero-as-null-ppinter-constant +#pragma clang diagnostic pop +#endif + +#endif // TINYEXR_IMPLEMENTATION_DEIFNED +#endif // TINYEXR_IMPLEMENTATION diff --git a/src/VK/offscreen/main.cpp b/src/VK/offscreen/main.cpp new file mode 100644 index 0000000..4c48465 --- /dev/null +++ b/src/VK/offscreen/main.cpp @@ -0,0 +1,102 @@ +#include "Vulkan_FSR2.hpp" + +int main(int argc, const char* argv[]) { + if (argc < 2) { + std::cout << "Usage: FSR2 OffScreen [OPTIONS]\n\n"; + std::cout << "OPTIONS:\n"; + std::cout << " --color_path path of input color images\n"; + std::cout << " --mv_d_path path of motion vector and depth of input images\n"; + std::cout << " --out_path the output path of the result images\n"; + std::cout << " --s_mv_x the scale of x of motion vector\n"; + std::cout << " --s_mv_y the scale of y of motion vector\n"; + std::cout << " --s_depth the scale of depth\n"; + std::cout << " --fps Frames Per Second\n"; + std::cout << " --w output Width\n"; + std::cout << " --h output Height\n"; + std::cout << " --r reset for every frame\n"; + std::cout << " --j enable Jitter\n"; + + return 0; + } + std::vector file_list; + std::string input_path = ""; + std::string motion_depth_path = ""; + std::string out_path = ""; + float scale_motion_x = -0.5f;//default value cause I am working with ue4. + float scale_motion_y = 0.5f; + float scale_depth = 1000.0f; + double deltaTime = 33.3f;//33.3f for 30fps, if running at 60fps, the value passed should be around 16.6f. + float outWidth = 3840; + float outHeight = 2160; + bool reset = false; + bool enableJitter = false; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--color_path") == 0 && i + 1 < argc) + { + input_path = argv[i + 1]; + } + else if (strcmp(argv[i], "--mv_d_path") == 0 && i + 1 < argc) + { + motion_depth_path = argv[i + 1]; + } + else if (strcmp(argv[i], "--out_path") == 0 && i + 1 < argc) + { + out_path = argv[i + 1]; + } + else if (strcmp(argv[i], "--s_mv_x") == 0 && i + 1 < argc) + { + scale_motion_x = atof(argv[i + 1]); + } + else if (strcmp(argv[i], "--s_mv_y") == 0 && i + 1 < argc) + { + scale_motion_y = atof(argv[i + 1]); + } + else if (strcmp(argv[i], "--s_depth") == 0 && i + 1 < argc) + { + scale_depth = atof(argv[i + 1]); + } + else if (strcmp(argv[i], "--fps") == 0 && i + 1 < argc) + { + double fps = atof(argv[i + 1]); + deltaTime = 1.0f / fps; + } + else if (strcmp(argv[i], "--w") == 0 && i + 1 < argc) + { + outWidth = atoi(argv[i + 1]); + } + else if (strcmp(argv[i], "--h") == 0 && i + 1 < argc) + { + outHeight = atoi(argv[i + 1]); + } + else if (strcmp(argv[i], "--r") == 0) + { + reset = true; + } + else if (strcmp(argv[i], "--j") == 0) + { + enableJitter = true; + } + } + get_files(file_list, input_path); + //copy + config config_fsr; + config_fsr.file_list = file_list; + config_fsr.input_path = input_path; + config_fsr.motion_depth_path = motion_depth_path; + config_fsr.out_path = out_path; + config_fsr.scale_motion_x = scale_motion_x; + config_fsr.scale_motion_y = scale_motion_y; + config_fsr.scale_depth = scale_depth; + config_fsr.deltaTime = deltaTime; + config_fsr.outWidth = outWidth; + config_fsr.outHeight = outHeight; + config_fsr.reset = reset; + config_fsr.enableJitter = enableJitter; + + Vulkan_FSR2 FSR2; + FSR2.run(config_fsr); + + + return EXIT_SUCCESS; +} \ No newline at end of file